LCOV - code coverage report
Current view: top level - utils/pilgrim - parutilitiesmodule.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 569 1284 44.3 %
Date: 2025-03-14 01:30:37 Functions: 14 51 27.5 %

          Line data    Source code
       1             : #if !defined(STAND_ALONE)
       2             : #endif
       3             : #define _SMEMORY 1
       4             : !-----------------------------------------------------------------------
       5             : !         Nasa/GSFC, Data Assimilation Office, Code 910.3, GEOS/DAS
       6             : !-----------------------------------------------------------------------
       7             :       MODULE parutilitiesmodule
       8             : #if defined( SPMD )
       9             : !BOP
      10             : !
      11             : ! !MODULE: parutilitiesmodule
      12             : !
      13             : ! !USES:
      14             : #if defined( STAND_ALONE )
      15             : # define iulog 6
      16             : #else
      17             :       use cam_logfile, only: iulog
      18             : #endif
      19             : #if !defined(STAND_ALONE)
      20             :       USE shr_kind_mod, only: r8 => shr_kind_r8, i8 => shr_kind_i8, &
      21             :                               r4 => shr_kind_r4
      22             : #endif
      23             :       USE mod_comm, ONLY : commglobal, gid, numpro, blockdescriptor, max_nparcels
      24             : #include "debug.h"
      25             :       IMPLICIT NONE
      26             : #include "mpif.h"
      27             : #include "pilgrim.h"
      28             : 
      29             : !
      30             : ! !PUBLIC DATA MEMBERS:
      31             :       PUBLIC     Gsize
      32             :       PUBLIC     INT4, REAL4, REAL8
      33             :       PUBLIC     SUMOP, MAXOP, MINOP, BCSTOP
      34             :        
      35             : 
      36             :       INTEGER,SAVE :: GSize        ! Size of communicator CommGlobal
      37             :                                    ! Equivalent to mod_comm::numpro
      38             : #define CPP_SUM_OP 101
      39             : #define CPP_MAX_OP 102
      40             : #define CPP_MIN_OP 103
      41             : #define CPP_BCST_OP 104
      42             : 
      43             :       INTEGER,SAVE :: INT4  = MPI_INTEGER
      44             :       INTEGER,SAVE :: REAL4 = MPI_REAL
      45             :       INTEGER,SAVE :: REAL8 = MPI_DOUBLE_PRECISION
      46             :       INTEGER,SAVE :: SUMOP = MPI_SUM
      47             :       INTEGER,SAVE :: MAXOP = MPI_MAX
      48             :       INTEGER,SAVE :: MINOP = MPI_MIN
      49             :       INTEGER,SAVE :: BCSTOP = CPP_BCST_OP
      50             : 
      51             : ! !PUBLIC MEMBER FUNCTIONS:
      52             :       PUBLIC ParPatternType
      53             : 
      54             :       TYPE ParPatternType
      55             :         INTEGER ::     Comm                  ! Communicator
      56             :         INTEGER ::     Iam                   ! My rank in communicator
      57             :         INTEGER ::     Size                  ! Size of communicator
      58             :         TYPE(BlockDescriptor), POINTER :: SendDesc(:) ! Array of descriptors
      59             :         TYPE(BlockDescriptor), POINTER :: RecvDesc(:) ! Array of descriptors
      60             :       END TYPE ParPatternType 
      61             : 
      62             : 
      63             : #ifdef _SMEMORY
      64             :       TYPE ParInfoType
      65             :         INTEGER :: numRecvSeg               ! number of received segments
      66             :         INTEGER :: numSendSeg               ! number of send segments
      67             :         INTEGER :: maxNumSeg                ! maximum number of segments over all processors
      68             :         INTEGER :: numRecvNeigh             ! number of receive neighbors
      69             :         INTEGER :: numSendNeigh             ! number of send neighbors
      70             :       END TYPE ParInfoType
      71             : #endif
      72             : 
      73             :       PUBLIC     ParInit, ParSplit, ParFree, ParExit
      74             :       PUBLIC     ParScatter, ParGather
      75             :       PUBLIC     ParBeginTransfer, ParEndTransfer
      76             :       PUBLIC     ParExchangeVector, ParCollective
      77             :       PUBLIC     ParPatternCreate, ParPatternFree
      78             : 
      79             :       INTERFACE     ParPatternCreate
      80             :         MODULE PROCEDURE ParPatternCopy
      81             :         MODULE PROCEDURE ParPatternGhost
      82             :         MODULE PROCEDURE ParPatternDecompToDecomp
      83             :         MODULE PROCEDURE ParPatternDecompToGhost
      84             :         MODULE PROCEDURE ParPatternGhostToDecomp
      85             :         MODULE PROCEDURE ParPatternGhostToGhost
      86             :       END INTERFACE
      87             :  
      88             :       INTERFACE     ParScatter
      89             :         MODULE PROCEDURE ParScatterReal
      90             :         MODULE PROCEDURE ParScatterReal4
      91             :         MODULE PROCEDURE ParScatterInt
      92             :       END INTERFACE
      93             :  
      94             :       INTERFACE     ParGather
      95             :         MODULE PROCEDURE ParGatherReal
      96             :         MODULE PROCEDURE ParGatherReal4
      97             :         MODULE PROCEDURE ParGatherInt
      98             :       END INTERFACE
      99             : 
     100             :       INTERFACE     ParBeginTransfer
     101             :         MODULE PROCEDURE ParBeginTransferReal
     102             :         MODULE PROCEDURE ParBeginTransferPattern1D
     103             :         MODULE PROCEDURE ParBeginTransferPattern1Dint
     104             :         MODULE PROCEDURE ParBeginTransferPattern2D
     105             :         MODULE PROCEDURE ParBeginTransferPattern3D
     106             :         MODULE PROCEDURE ParBeginTransferPattern4D
     107             : !        MODULE PROCEDURE ParBeginTransferInt
     108             :       END INTERFACE
     109             : 
     110             :       INTERFACE     ParEndTransfer
     111             :         MODULE PROCEDURE ParEndTransferReal
     112             :         MODULE PROCEDURE ParEndTransferPattern1D
     113             :         MODULE PROCEDURE ParEndTransferPattern1Dint
     114             :         MODULE PROCEDURE ParEndTransferPattern2D
     115             :         MODULE PROCEDURE ParEndTransferPattern3D
     116             :         MODULE PROCEDURE ParEndTransferPattern4D
     117             : !        MODULE PROCEDURE ParEndTransferInt
     118             :       END INTERFACE
     119             : 
     120             :       INTERFACE     ParExchangeVector
     121             :         MODULE PROCEDURE ParExchangeVectorReal
     122             :         MODULE PROCEDURE ParExchangeVectorReal4
     123             :         MODULE PROCEDURE ParExchangeVectorInt
     124             :       END INTERFACE
     125             : 
     126             :       INTERFACE     ParCollective
     127             :         MODULE PROCEDURE ParCollectiveBarrier
     128             :         MODULE PROCEDURE ParCollective0D
     129             :         MODULE PROCEDURE ParCollective1D
     130             :         MODULE PROCEDURE ParCollective1DReal4
     131             :         MODULE PROCEDURE ParCollective2D
     132             :         MODULE PROCEDURE ParCollective2DReal4
     133             :         MODULE PROCEDURE ParCollective3D
     134             :         MODULE PROCEDURE ParCollective0DInt
     135             :         MODULE PROCEDURE ParCollective0DStr
     136             :         MODULE PROCEDURE ParCollective1DInt
     137             :         MODULE PROCEDURE ParCollective1DStr
     138             :         MODULE PROCEDURE ParCollective2DInt
     139             :       END INTERFACE
     140             : 
     141             : #ifdef _SMEMORY
     142             :       INTERFACE   ParCalcInfo
     143             :         MODULE PROCEDURE ParCalcInfoDecompToGhost
     144             :         MODULE PROCEDURE ParCalcInfoDecompToDecomp
     145             :         MODULE PROCEDURE ParCalcInfoGhostToGhost
     146             :         MODULE PROCEDURE ParCalcInfoGhostToDecomp
     147             :       END INTERFACE
     148             : #endif
     149             : 
     150             : !
     151             : ! !DESCRIPTION:
     152             : !
     153             : !      This module provides the basic utilities to support parallelism
     154             : !      on a distributed or shared memory multiprocessor.
     155             : !
     156             : !      \begin{center}
     157             : !      \begin{tabular}{|l|l|} \hline \hline
     158             : !        ParInit           & Initialize the parallel system \\ \hline
     159             : !        ParExit           & Exit from the parallel system \\ \hline
     160             : !        ParSplit          & Create a Compute grid of PEs   \\ \hline
     161             : !        ParFree           & Free a split communicator \\ \hline
     162             : !        ParScatter        & Scatter global slice to local slices \\ \hline
     163             : !        ParGather         & Gather local slices to one global \\ \hline
     164             : !        ParBeginTransfer  & Initiate an all-to-all packet transfer \\ \hline
     165             : !        ParEndTransfer    & Complete an all-to-all packet transfer \\ \hline
     166             : !        ParExchangeVector & Complete an all-to-all packet transfer \\ \hline
     167             : !        ParCollective     & Collective operation across communicator \\ \hline
     168             : !      \end{tabular}
     169             : !      \end{center}
     170             : !      \vspace{2mm}
     171             : !
     172             : !      Other utilities can be added to this module as needs evolve.
     173             : !
     174             : !      Conceptually the intention is to aggregate as many of the
     175             : !      MPI communication calls as possible into a well-maintained
     176             : !      module.  This will help avoid the occurrence of MPI spaghetti 
     177             : !      code.  
     178             : !
     179             : !      This module is tailored to GEOS DAS and implements the 
     180             : !      design of Lucchesi/Mirin/Sawyer/Larson.
     181             : !
     182             : ! !REVISION HISTORY:
     183             : !   97.02.01   Sawyer     Creation
     184             : !   97.07.22   Sawyer     Removal of DecompType related subroutines
     185             : !   97.08.13   Sawyer     Added ParScatter/Gather for Integers
     186             : !   97.09.26   Sawyer     Additions of Sparse communication primitives
     187             : !   97.12.01   Sawyer     Changed all MPI_SSEND to MPI_ISEND
     188             : !   97.12.23   Lucchesi   Added member variables IsIONode and InterComm
     189             : !   98.01.06   Sawyer     Additions from RL for I/O Nodes
     190             : !   98.02.02   Sawyer     Added the Cartesian data members
     191             : !   98.02.05   Sawyer     Removed the use of intercommunicators
     192             : !   98.02.23   Sawyer     Added ghosting utilities
     193             : !   98.02.25   Sawyer     Modified interface of BeginTransfer
     194             : !   98.03.03   Sawyer     Added Global ID number to public data members
     195             : !   98.03.25   Sawyer     Added documentation for walkthrough
     196             : !   98.04.16   Sawyer     Removed all use of MPI_CART (CommRow redefined)
     197             : !   98.07.23   Sawyer     Added ParGhost, ParPoleDot; ParBegin/EndGhost out
     198             : !   98.09.15   Sawyer     Added ParMerge, ParPoleGhost
     199             : !   98.09.17   Sawyer     Added ParSum, removed ParPoleDot
     200             : !   99.01.18   Sawyer     Minor cleaning
     201             : !   99.03.04   Sawyer     Revised SHMEM concept for Transfer
     202             : !   99.04.22   Sawyer     Removed COMMON for handles -- they are
     203             : !                         always used in same program unit.
     204             : !   99.05.21   Sawyer     Reintroduced barriers in Scatter/Gather
     205             : !   99.06.03   Sawyer     USE_SHMEM revisions
     206             : !   99.12.10   Sawyer     ParInit now sets GID, Gsize
     207             : !   99.12.13   Sawyer     Version slimmed down for FVCCM release
     208             : !   00.06.14   Sawyer     Precision module now used
     209             : !   00.07.07   Sawyer     Removed 2D scatter/gather; simplified API
     210             : !   00.07.30   Sawyer     Full implementation with shared memory
     211             : !   00.08.09   Sawyer     Replaced ParSum with ParCollective
     212             : !   00.08.28   Sawyer     Moved LLNL 2D data to LLNL2DModule; new MLP impl
     213             : !   01.02.04   Sawyer     Added PatternType and related routines
     214             : !   01.02.12   Sawyer     Converted to free format
     215             : !   02.10.30   Sawyer     Welded with mod_comm
     216             : !   03.03.06   Sawyer     Fix parpatterncreate for MPI2; use MPI_DATATYPE_NULL
     217             : !   05.10.12   Worley     Support for vectorization modifications in mod_comm
     218             : !   06.03.01   Sawyer     Merged CAM and GEOS5 versions
     219             : !   07.01.05   Mirin      Eliminated direct use of Gsize
     220             : !   07.09.04   Dennis     Reduced temporary memory usage
     221             : !
     222             : ! !BUGS:
     223             : !   There are several MPI_Barriers at locations in the code.
     224             : !   These avoid potential race conditions which probably only occur
     225             : !   if the number of real processors is less than the number of
     226             : !   message passing processes.  Remove these barriers at your own risk
     227             : !
     228             : !EOP
     229             : 
     230             :       INTEGER, SAVE :: InHandle(MAX_PAX, MAX_SMP, MAX_TRF)
     231             :       INTEGER, SAVE :: OutHandle(MAX_PAX,MAX_SMP, MAX_TRF)
     232             :       INTEGER, SAVE :: BegTrf = 0  ! Ongoing overlapped begintransfer # 
     233             :       INTEGER, SAVE :: EndTrf = 0  ! Ongoing overlapped endtransfer #
     234             :       LOGICAL, SAVE :: Initialized = .FALSE. ! Flag for initialization of parutilitiesmodule.
     235             : 
     236             :       CONTAINS
     237             : !-----------------------------------------------------------------------
     238             : 
     239             : 
     240             : !-----------------------------------------------------------------------
     241             : !BOP
     242             : ! !IROUTINE: ParInit --- Initialize the parallel execution
     243             : !
     244             : ! !INTERFACE: 
     245        1536 :       SUBROUTINE ParInit ( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
     246             : !
     247             : ! !USES:
     248             :       USE mod_comm, ONLY : mp_init
     249             :       IMPLICIT NONE
     250             : ! !INPUT PARAMETERS:
     251             :       INTEGER, OPTIONAL  :: Comm
     252             :       INTEGER, OPTIONAL, INTENT(IN) :: npryzxy(4)      ! 2D decompositions
     253             :       INTEGER, OPTIONAL, INTENT(IN) :: mod_method      ! CAM optimization
     254             :       INTEGER, OPTIONAL, INTENT(IN) :: mod_geopk       ! CAM optimization
     255             :       INTEGER, OPTIONAL, INTENT(IN) :: mod_gatscat     ! CAM optimization
     256             :       INTEGER, OPTIONAL, INTENT(IN) :: mod_maxirr      ! CAM max simul. trsps.
     257             : 
     258             : !
     259             : ! !DESCRIPTION:
     260             : !     Initializes the system.  In MPI mode, call MPI\_INIT if not done 
     261             : !     already. If the optional arguments are not provided, default 
     262             : !     values will be chosen.  But it is advisable to provide COMM
     263             : !     (main communicator) and NPRYZXY (internal 2D decomposition).
     264             : !
     265             : ! !SYSTEM ROUTINES:
     266             : !     MPI_INITIALIZED, MPI_INIT
     267             : !
     268             : ! !REVISION HISTORY:
     269             : !   97.03.20   Sawyer     Creation
     270             : !   97.04.16   Sawyer     Cleaned up for walk-through
     271             : !   97.07.03   Sawyer     Reformulated documentation
     272             : !   00.07.23   Sawyer     Added shared memory arena implementation
     273             : !   02.10.30   Sawyer     Now uses mp_init from mod_comm
     274             : !   06.06.15   Sawyer     Added CAM optimizations (passed to mod_comm)
     275             : !
     276             : !EOP
     277             : !-----------------------------------------------------------------------
     278             : !BOC
     279             : 
     280             : ! Initialize mod_comm
     281             : 
     282        1536 :       IF (.NOT. Initialized) THEN
     283        1536 :          CALL mp_init( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
     284        1536 :          Gsize = numpro   !   Support PILGRIM's Gsize for now
     285        1536 :          Initialized = .TRUE.
     286             :       ENDIF
     287             : 
     288        1536 :       RETURN
     289             : !EOC
     290             :       END SUBROUTINE ParInit
     291             : !-----------------------------------------------------------------------
     292             : 
     293             : 
     294             : !-----------------------------------------------------------------------
     295             : !BOP
     296             : ! !IROUTINE: ParExit --- Finalize the parallel execution
     297             : !
     298             : ! !INTERFACE:
     299           0 :       SUBROUTINE ParExit ( Comm )
     300             : 
     301             : ! !USES:
     302             :       USE mod_comm, ONLY: mp_exit
     303             :       IMPLICIT NONE
     304             : 
     305             : ! !INPUT PARAMETERS:
     306             :       INTEGER, OPTIONAL  :: Comm
     307             : 
     308             : ! !DESCRIPTION:
     309             : !     All PEs, compute nodes and IO nodes alike meet here to terminate
     310             : !     themselves.  If someone does not check in, everything will hang
     311             : !     here.
     312             : !
     313             : !     This routine is the very {\em last} thing which is executed!
     314             : !
     315             : ! !LOCAL VARIABLES:
     316             :       INTEGER Ierror
     317             : !
     318             : ! !SYSTEM ROUTINES:
     319             : !     MPI_BARRIER, MPI_FINALIZE
     320             : !
     321             : ! !REVISION HISTORY:
     322             : !   97.03.20   Sawyer     Creation
     323             : !   97.04.16   Sawyer     Cleaned up for walk-through
     324             : !   97.07.03   Sawyer     Reformulated documentation
     325             : !   00.07.23   Sawyer     Added shared memory arena implementation
     326             : !   02.08.13   Sawyer     Incorporated mod_comm for low level comm.
     327             : !
     328             : !EOP
     329             : !-----------------------------------------------------------------------
     330             : !BOC
     331           0 :       CALL mp_exit(Comm)
     332           0 :       RETURN
     333             : !EOC
     334             :       END SUBROUTINE ParExit
     335             : !-----------------------------------------------------------------------
     336             : 
     337             : 
     338             : !-----------------------------------------------------------------------
     339             : !BOP
     340             : ! !IROUTINE:   ParSplit --- Split into group for I/O and computation
     341             : !
     342             : ! !INTERFACE:
     343        6144 :       SUBROUTINE ParSplit( InComm, Color, InID, Comm, MyID, Nprocs )
     344             : !
     345             : ! !USES:
     346             :       IMPLICIT NONE
     347             : 
     348             : ! !INPUT PARAMETERS:
     349             :       INTEGER, INTENT( IN )     :: InComm    ! Communicator to split
     350             :       INTEGER, INTENT( IN )     :: Color     ! Group label
     351             :       INTEGER, INTENT( IN )     :: InID      ! Input ID
     352             : 
     353             : ! !OUTPUT PARAMETERS:
     354             :       INTEGER, INTENT( OUT )    :: Comm      ! Split communicator
     355             :       INTEGER, INTENT( OUT )    :: MyID      ! Group label
     356             :       INTEGER, INTENT( OUT )    :: Nprocs    ! Number of PEs in my group
     357             : !
     358             : ! !DESCRIPTION:
     359             : !     This routine splits the PEs into groups.  This is currently only
     360             : !     supported in MPI mode. Read the chapter on MPI\_COMM\_SPLIT 
     361             : !     thoroughly.  
     362             : !
     363             : ! !SYSTEM ROUTINES:
     364             : !     MPI_COMM_SPLIT, MPI_COMM_SIZE, MPI_COMM_RANK
     365             : !
     366             : ! !REVISION HISTORY:
     367             : !   97.03.20   Sawyer     Creation
     368             : !   97.04.16   Sawyer     Cleaned up for walk-through
     369             : !   97.07.03   Sawyer     Reformulated documentation
     370             : !   97.12.01   Sawyer     Xnodes and Ynodes are explicit arguments
     371             : !   97.12.23   Lucchesi   Added call to MPI_INTERCOMM_CREATE
     372             : !   98.01.06   Sawyer     Additions from RL for I/O Nodes
     373             : !   98.02.02   Sawyer     Added the Cartesian information
     374             : !   98.02.05   Sawyer     Removed the use of intercommunicators
     375             : !   98.04.16   Sawyer     Removed all use of MPI_CART (CommRow redefined)
     376             : !   99.01.10   Sawyer     CommRow now defined for all rows
     377             : !   00.07.09   Sawyer     Removed 2D computational mesh
     378             : !   00.08.08   Sawyer     Redefined as wrapper to mpi_comm_split
     379             : !
     380             : !EOP
     381             : !-----------------------------------------------------------------------
     382             : !BOC
     383             : ! !LOCAL VARIABLES:
     384             :       INTEGER  Ierror
     385             : 
     386             :       CPP_ENTER_PROCEDURE( "PARSPLIT" )
     387             : !
     388             : !     Split the communicators
     389             : !
     390        6144 :       CALL MPI_COMM_SPLIT( InComm, Color, InID, Comm, Ierror )
     391        6144 :       IF ( Comm .ne. MPI_COMM_NULL ) THEN
     392        6144 :         CALL MPI_COMM_RANK( Comm, MyID, Ierror )
     393        6144 :         CALL MPI_COMM_SIZE( Comm, Nprocs, Ierror )
     394             :       ELSE
     395             : !
     396             : !     This PE does not participate: mark with impossible values
     397             : !
     398           0 :         MyID = -1
     399           0 :         Nprocs = -1
     400             :       ENDIF
     401             : 
     402             :       CPP_LEAVE_PROCEDURE( "PARSPLIT" )
     403        6144 :       RETURN
     404             : !EOC
     405             :       END SUBROUTINE ParSplit
     406             : !-----------------------------------------------------------------------
     407             : 
     408             : 
     409             : !-----------------------------------------------------------------------
     410             : !BOP
     411             : ! !IROUTINE:   ParFree --- Free a communicator
     412             : !
     413             : ! !INTERFACE:
     414           0 :       SUBROUTINE ParFree( InComm ) 
     415             : !
     416             : ! !USES:
     417             :       IMPLICIT NONE
     418             : 
     419             : ! !INPUT PARAMETERS:
     420             :       INTEGER InComm
     421             : 
     422             : !
     423             : ! !DESCRIPTION:
     424             : !     This routine frees a communicator created with ParSplit
     425             : !
     426             : ! !REVISION HISTORY:
     427             : !   97.09.11   Sawyer     Creation, to complement ParSplit
     428             : !   00.07.24   Sawyer     Revamped ParMerge into a free communicator 
     429             : !
     430             : ! !LOCAL VARIABLES:
     431             :       INTEGER  Ierror
     432             : !
     433             : !EOP
     434             : !-----------------------------------------------------------------------
     435             : !BOC
     436             :       CPP_ENTER_PROCEDURE( "PARFREE" )
     437             : !
     438           0 :       CALL MPI_COMM_FREE( InComm, Ierror ) 
     439             :       CPP_LEAVE_PROCEDURE( "PARFREE" )
     440           0 :       RETURN
     441             : !EOC
     442             :       END SUBROUTINE ParFree
     443             : !-----------------------------------------------------------------------
     444             : 
     445             : 
     446             : !-----------------------------------------------------------------------
     447             : !BOP
     448             : ! !IROUTINE:   ParPatternCopy --- Duplicate/replicate a comm pattern
     449             : !
     450             : ! !INTERFACE:
     451       12288 :       SUBROUTINE ParPatternCopy( InComm, PatternIn, PatternOut, Multiplicity )
     452             : !
     453             : ! !USES:
     454             :       USE mod_comm, ONLY : get_partneroffset
     455             :       IMPLICIT NONE
     456             : 
     457             : ! !INPUT PARAMETERS:
     458             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
     459             :       TYPE(ParPatternType), INTENT( IN )   :: PatternIn   ! Comm Pattern
     460             :       INTEGER, INTENT( IN ),  OPTIONAL     :: Multiplicity
     461             : 
     462             : ! !OUTPUT PARAMETERS:
     463             :       TYPE(ParPatternType), INTENT( OUT )  :: PatternOut  ! Comm Pattern
     464             : !
     465             : ! !DESCRIPTION:
     466             : !     This routine duplicates a given communication pattern. 
     467             : !
     468             : !     Optionally a multiplicity can be added.  This replicates the
     469             : !     communication pattern Mult times, that is for the case that
     470             : !     the data structures are replicated in the final dimension
     471             : !     Mult times.  A typical example is a pattern describing a 2D
     472             : !     array, e.g. a a lat-lon decomposition, which will be used
     473             : !     to copy a 3D lat-lon-lev array.  The strides (e.g. the number
     474             : !     of elements in one plane) of the source (send) and target 
     475             : !     (recv) arrays are now calculated internally.
     476             : !
     477             : ! !SYSTEM ROUTINES:
     478             : !     MPI_TYPE_UB, MPI_TYPE_HVECTOR, MPI_TYPE_COMMIT
     479             : !
     480             : ! !REVISION HISTORY:
     481             : !   03.03.20   Sawyer     Creation
     482             : !   03.06.26   Sawyer     Removed StrideSend/Recv from API
     483             : !
     484             : !EOP 
     485             : !-----------------------------------------------------------------------
     486             : !BOC
     487             : ! !LOCAL VARIABLES:
     488             :       INTEGER  Stride_S, Stride_R, Mult, Iam, GroupSize, Ipe, Ierror
     489             :       INTEGER  Disp, Length, I, J, ub, method
     490             : 
     491             :       CPP_ENTER_PROCEDURE( "PARPATTERNCOPY" )
     492             : 
     493       12288 :       method = PatternIn%RecvDesc(1)%method
     494             : 
     495             : !
     496             : ! Decide if this is a simple copy, or a multiple replication
     497             : !
     498       12288 :       IF ( present(Multiplicity) ) THEN
     499       12288 :           Mult = Multiplicity
     500             :       ELSE
     501           0 :           Mult = 1
     502             :       ENDIF
     503             : 
     504       12288 :       CALL MPI_COMM_DUP( PatternIn%Comm, PatternOut%Comm, Ierror )
     505       12288 :       CALL MPI_COMM_SIZE( PatternIn%Comm, GroupSize, Ierror )
     506       12288 :       CALL MPI_COMM_RANK( PatternIn%Comm, Iam, Ierror )
     507             : 
     508       12288 :       PatternOut%Iam  = Iam
     509       12288 :       PatternOut%Size = GroupSize
     510             : 
     511       36864 :       ALLOCATE( PatternOut%SendDesc( GroupSize ) )
     512       24576 :       ALLOCATE( PatternOut%RecvDesc( GroupSize ) )
     513             : 
     514     9449472 :       PatternOut%SendDesc(:)%method = PatternIn%SendDesc(:)%method
     515     9449472 :       PatternOut%RecvDesc(:)%method = PatternIn%RecvDesc(:)%method
     516             : !
     517             : ! Determine the strides which are by construction the maximum upper
     518             : ! bound of all the derived types.  This is due to the fact that
     519             : ! there are no 'holes' in the data types: even if one PE does not
     520             : ! send to any other PEs, it will still have a data type for 'sending'
     521             : ! data to itself.
     522             : !
     523       12288 :         Stride_S = 0
     524       12288 :         Stride_R = 0       
     525     9449472 :         DO Ipe=1, GroupSize
     526     9437184 :           IF ( PatternIn%SendDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
     527             :             CALL MPI_TYPE_UB( PatternIn%SendDesc(Ipe)%type, ub, ierror )
     528           0 :             Stride_S = max(Stride_S,ub)
     529             :           ENDIF
     530     9449472 :           IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
     531             :             CALL MPI_TYPE_UB( PatternIn%RecvDesc(Ipe)%type, ub, ierror )
     532           0 :             Stride_R = max(Stride_R,ub)
     533             :           ENDIF
     534             :         ENDDO
     535             : 
     536             : !
     537             : ! Determine the output data types
     538             : !
     539     9449472 :         DO Ipe=1, GroupSize
     540     9437184 :           IF ( PatternIn%SendDesc(ipe)%type /= MPI_DATATYPE_NULL ) THEN
     541             :             CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_S, PatternIn%SendDesc(Ipe)%type,&
     542           0 :                                    PatternOut%SendDesc(Ipe)%type, Ierror )
     543           0 :             CALL MPI_TYPE_COMMIT( PatternOut%SendDesc(Ipe)%type, Ierror )
     544             :           ELSE
     545     9437184 :             PatternOut%SendDesc(ipe)%type = MPI_DATATYPE_NULL
     546             :           ENDIF
     547     9449472 :           IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
     548             :             CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_R, PatternIn%RecvDesc(Ipe)%type,&
     549           0 :                                    PatternOut%RecvDesc(Ipe)%type, Ierror )
     550           0 :             CALL MPI_TYPE_COMMIT( PatternOut%RecvDesc(Ipe)%type, Ierror )
     551             :           ELSE
     552     9437184 :             PatternOut%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
     553             :           ENDIF
     554             :         ENDDO
     555             : 
     556             : !
     557             : ! Determine the stride, which is the sum of all the blocksizes for all
     558             : ! the derived types (there are no 'holes').
     559             : !      
     560       12288 :         Stride_S = 0
     561       12288 :         Stride_R = 0       
     562     9449472 :         DO Ipe=1, GroupSize
     563     9474048 :           Stride_S = Stride_S + sum( PatternIn%SendDesc(ipe)%BlockSizes(:) )
     564     9486336 :           Stride_R = Stride_R + sum( PatternIn%RecvDesc(ipe)%BlockSizes(:) )
     565             :         ENDDO
     566             : 
     567     9449472 :         DO ipe=1, GroupSize
     568     9437184 :           Length = SIZE(PatternIn%SendDesc(ipe)%BlockSizes) 
     569    18886656 :           ALLOCATE( PatternOut%SendDesc(ipe)%Displacements(Length*Mult) )
     570     9449472 :           ALLOCATE( PatternOut%SendDesc(ipe)%BlockSizes(Length*Mult) )
     571             : #if defined( DEBUG_PARPATTERNCOPY )
     572             :           write(iulog,*) "Multiplicity", Mult
     573             :           write(iulog,*) "Old send blocksizes", PatternIn%SendDesc(ipe)%BlockSizes
     574             : #endif
     575     9474048 :           DO i=1, Length
     576       36864 :             Disp = PatternIn%SendDesc(ipe)%Displacements(i)
     577    10672128 :             DO j=1, Mult
     578           0 :               PatternOut%SendDesc(ipe)%BlockSizes(i+(j-1)*Length) =     &
     579     1198080 :                     PatternIn%SendDesc(ipe)%BlockSizes(i)
     580     1198080 :               PatternOut%SendDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
     581     1234944 :               Disp = Disp + Stride_S
     582             :             ENDDO
     583             :           ENDDO
     584           0 :           PatternOut%SendDesc(ipe)%Nparcels  = &
     585     9437184 :             size (PatternOut%SendDesc(ipe)%Displacements)
     586           0 :           PatternOut%SendDesc(ipe)%Tot_Size = &
     587    10635264 :             sum  (PatternOut%SendDesc(ipe)%Blocksizes)
     588     9437184 :           Max_Nparcels = max (Max_Nparcels, PatternOut%SendDesc(ipe)%Nparcels)
     589             : #if defined( DEBUG_PARPATTERNCOPY )
     590             :           write(iulog,*) "Send blocksizes", PatternOut%SendDesc(ipe)%BlockSizes
     591             :           write(iulog,*) "Old recv blocksizes", PatternIn%RecvDesc(ipe)%BlockSizes
     592             : #endif
     593     9437184 :           Length = SIZE(PatternIn%RecvDesc(ipe)%BlockSizes) 
     594    18886656 :           ALLOCATE( PatternOut%RecvDesc(ipe)%Displacements(Length*Mult) )
     595     9449472 :           ALLOCATE( PatternOut%RecvDesc(ipe)%BlockSizes(Length*Mult) )
     596     9474048 :           DO i=1, Length
     597       36864 :             Disp = PatternIn%RecvDesc(ipe)%Displacements(i)
     598    10672128 :             DO j=1, Mult
     599           0 :               PatternOut%RecvDesc(ipe)%BlockSizes(i+(j-1)*Length) =     &
     600     1198080 :                     PatternIn%RecvDesc(ipe)%BlockSizes(i)
     601     1198080 :               PatternOut%RecvDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
     602     1234944 :               Disp = Disp + Stride_R
     603             :             ENDDO
     604             :           ENDDO
     605           0 :           PatternOut%RecvDesc(ipe)%Nparcels  = &
     606     9437184 :             size (PatternOut%RecvDesc(ipe)%Displacements)
     607           0 :           PatternOut%RecvDesc(ipe)%Tot_Size = &
     608    10635264 :             sum  (PatternOut%RecvDesc(ipe)%Blocksizes)
     609     9449472 :           Max_Nparcels = max (Max_Nparcels, PatternOut%RecvDesc(ipe)%Nparcels)
     610             : #if defined( DEBUG_PARPATTERNCOPY )
     611             :           write(iulog,*) "Recv blocksizes", PatternOut%RecvDesc(ipe)%BlockSizes
     612             : #endif
     613             :         ENDDO
     614             : 
     615       12288 :         CALL get_partneroffset( InComm, PatternOut%SendDesc, PatternOut%RecvDesc )
     616             :       
     617             :       CPP_LEAVE_PROCEDURE( "PARPATTERNCOPY" )
     618       12288 :       RETURN
     619             : !EOC
     620             :       END SUBROUTINE ParPatternCopy
     621             : !-----------------------------------------------------------------------
     622             : 
     623             : !-----------------------------------------------------------------------
     624             : !BOP
     625             : ! !IROUTINE:   ParPatternGhost --- Create pattern for given ghosting
     626             : !
     627             : ! !INTERFACE:
     628           0 :       SUBROUTINE ParPatternGhost( InComm, Ghost, Pattern, mod_method, T )
     629             : !
     630             : ! !USES:
     631             :       USE decompmodule, ONLY : DecompGlobalToLocal, DecompLocalToGlobal
     632             :       USE ghostmodule, ONLY : GhostType, GhostInfo
     633             :       USE mod_comm, ONLY : get_partneroffset
     634             :       IMPLICIT NONE
     635             : 
     636             : ! !INPUT PARAMETERS:
     637             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
     638             :       TYPE(GhostType),  INTENT( IN )       :: Ghost   ! # of PEs
     639             :       INTEGER,  INTENT( IN ), OPTIONAL     :: mod_method ! contiguous or derived type
     640             :       INTEGER, INTENT( IN ),  OPTIONAL     :: T       ! 
     641             : 
     642             : ! !OUTPUT PARAMETERS:
     643             :       TYPE(ParPatternType), INTENT( OUT )  :: Pattern ! Comm Pattern
     644             : !
     645             : ! !DESCRIPTION:
     646             : !     This routine contructs a communication pattern from the ghost
     647             : !     region definition.  That is, the resulting communication pattern
     648             : !     can be used in ParBegin/EndTransfer with the ghosted arrays as
     649             : !     inputs.  
     650             : !
     651             : ! !SYSTEM ROUTINES:
     652             : !    MPI_COMM_SIZE,  MPI_COMM_RANK, MPI_COMM_DUP
     653             : !    MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
     654             : !
     655             : ! !REVISION HISTORY:
     656             : !   01.02.10   Sawyer     Creation
     657             : !   01.06.02   Sawyer     Renamed ParPatternGhost
     658             : !   02.06.27   Sawyer     Added data type "T" as optional argument
     659             : !   03.03.04   Sawyer     Set partneroffsets field
     660             : !   03.11.11   Mirin      Added optional argument mod_method
     661             : !
     662             : !EOP 
     663             : !-----------------------------------------------------------------------
     664             : !BOC
     665             : ! !LOCAL VARIABLES:
     666             :       INTEGER  i, j, ipe, pe, Iam, GroupSize, Num, Length, Ptr, Ierror
     667             :       INTEGER  Global, End, Local, GlobalSize, LocalSize, BorderSize
     668             :       INTEGER  DataType
     669           0 :       INTEGER, ALLOCATABLE :: InVector(:), OutVector(:)
     670           0 :       INTEGER, ALLOCATABLE :: LenInVector(:), LenOutVector(:)
     671             :       INTEGER              :: method
     672             : 
     673             :       CPP_ENTER_PROCEDURE( "PARPATTERNGHOST" )
     674             : 
     675           0 :       IF (present(T)) THEN
     676           0 :         DataType = T
     677             :       ELSE
     678           0 :         DataType = CPP_MPI_REAL8
     679             :       ENDIF
     680             : 
     681           0 :       IF (present(mod_method)) THEN
     682           0 :         method = mod_method
     683             :       ELSE
     684             :         method = 0     ! Default method - see mod_comm for description
     685             :       ENDIF
     686             : !
     687             : ! First request the needed ghost values from other processors.
     688             : !
     689           0 :       CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
     690           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
     691           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
     692             : 
     693           0 :       Pattern%Iam  = Iam
     694           0 :       Pattern%Size = GroupSize
     695             : 
     696           0 :       ALLOCATE( Pattern%SendDesc( GroupSize ) )
     697           0 :       ALLOCATE( Pattern%RecvDesc( GroupSize ) )
     698             : 
     699           0 :       Pattern%SendDesc(:)%method = method
     700           0 :       Pattern%RecvDesc(:)%method = method
     701             : 
     702             : !
     703             : ! Temporary variables
     704             : !
     705           0 :       ALLOCATE( LenInVector( GroupSize ) )
     706           0 :       ALLOCATE( LenOutVector( GroupSize ) )
     707             : 
     708           0 :       CALL GhostInfo( Ghost,GroupSize,GlobalSize,LocalSize,BorderSize )
     709           0 :       ALLOCATE( InVector( 2*BorderSize ) )
     710           0 :       ALLOCATE( OutVector( 2*LocalSize ) )
     711             : 
     712             : !
     713             : ! A rather complicated loop to define the local ghost region.
     714             : ! The concept is the following:  go through all the points in the
     715             : ! border data structure.   It contains global indices of the points
     716             : ! which have to be copied over from neighboring PEs.  These indices
     717             : ! are collected into InVector for transmission to those PEs, in
     718             : ! effect informing them of the local PEs requirements.
     719             : !
     720             : ! A special case is supported:  if the ghost domain wraps around
     721             : ! onto the domain of the local PE!  This is very tricky, because
     722             : ! the index space in both Ghost%Border and Ghost%Local MUST be
     723             : ! unique for DecompGlobalToLocal to work.   Solution:  ghost 
     724             : ! points are marked with the negative value of the needed domain 
     725             : ! value in both Ghost%Border and Ghost%Local.  These are "snapped 
     726             : ! over" to the true global index with the ABS function, so that 
     727             : ! they can be subsequently found in the true local domain.
     728             : !
     729           0 :       j = 1
     730           0 :       DO ipe=1, GroupSize
     731           0 :         Num = SIZE(Ghost%Border%Head(ipe)%StartTags)
     732           0 :         Length = 0
     733           0 :         DO i = 1, Num
     734           0 :           Global = Ghost%Border%Head(ipe)%StartTags(i)
     735           0 :           IF ( Global /= 0 ) THEN
     736           0 :             Length = Length + 1
     737           0 :             End    = Ghost%Border%Head(ipe)%EndTags(i)
     738           0 :             InVector(j) = ABS(Global)
     739           0 :             InVector(j+1) = ABS(End)
     740           0 :             CALL DecompGlobalToLocal( Ghost%Local, Global, Local, Pe )
     741           0 :             OutVector(Length) = Local-1                ! Zero-based address
     742           0 :             OutVector(Length+Num) = End - Global+1     ! Parcel size
     743           0 :             j = j + 2
     744             :           ENDIF
     745             :         ENDDO
     746           0 :         LenInVector(ipe) = 2*Length
     747             : 
     748             : !
     749             : ! Set the receive buffer descriptor
     750             : !
     751             : #if defined(DEBUG_PARPATTERNGHOST)
     752             :         write(iulog,*) "Iam",Iam,"Pe",Ipe-1,"Lens",OutVector(Num+1:Num+Length), &
     753             :              "Displacements", OutVector(1:Length)
     754             : #endif
     755             : 
     756           0 :           IF ( Length > 0 .and. method > 0 ) THEN
     757           0 :             CALL MPI_TYPE_INDEXED( Length, OutVector(Num+1), OutVector,    &
     758           0 :                                    DataType, Ptr, Ierror )
     759           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
     760           0 :             Pattern%RecvDesc(ipe)%type = Ptr
     761             :           ELSE
     762           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
     763             :           ENDIF
     764             : 
     765           0 :           ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Length) )
     766           0 :           ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Length) )
     767           0 :           DO i=1, Length
     768           0 :             Pattern%RecvDesc(ipe)%Displacements(i) = OutVector(i)
     769           0 :             Pattern%RecvDesc(ipe)%BlockSizes(i)    = OutVector(Num+i)
     770             :           ENDDO            
     771           0 :           Pattern%RecvDesc(ipe)%Nparcels  = &
     772           0 :             size (Pattern%RecvDesc(ipe)%Displacements)
     773           0 :           Pattern%RecvDesc(ipe)%Tot_Size = &
     774           0 :             sum  (Pattern%RecvDesc(ipe)%Blocksizes)
     775           0 :           Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
     776             : 
     777             :       ENDDO
     778             : 
     779             : !
     780             : ! Everybody exchanges the needed information
     781             : !
     782             : #if defined(DEBUG_PARPATTERNGHOST)
     783             :       write(iulog,*) "iam", iam, "In", LenInVector,                            &
     784             :                 InVector( 1:SUM(LenInVector) )
     785             : #endif
     786             :       CALL ParExchangeVectorInt( InComm, LenInVector, InVector,          &
     787           0 :                                      LenOutVector, OutVector )
     788             : #if defined(DEBUG_PARPATTERNGHOST)
     789             :       write(iulog,*) "iam", iam, "Out", LenOutVector,                          &
     790             :                 OutVector( 1:SUM(LenOutVector) )
     791             : #endif
     792             : 
     793             : !
     794             : ! Now everyone has the segments which need to be sent to the 
     795             : ! immediate neighbors.  Save these in PatternType.
     796             : !
     797           0 :       j = 1
     798           0 :       DO ipe = 1, GroupSize
     799           0 :         Num = LenOutVector(ipe) / 2
     800           0 :         DO i = 1, Num
     801           0 :           CALL DecompGlobalToLocal( Ghost%Local,OutVector(j),Local,pe )
     802           0 :           InVector(i) = Local-1
     803           0 :           InVector(i+Num) = OutVector(j+1) - OutVector(j) + 1
     804           0 :           j = j + 2
     805             :         ENDDO
     806             : #if defined(DEBUG_PARPATTERNGHOST)
     807             :         write(iulog,*) "Iam", Iam, "To", ipe-1, "InVector",                    &
     808             :               InVector(1:Num), "block size", InVector(Num+1:2*Num)
     809             : #endif
     810             : 
     811           0 :           IF ( Num > 0 .and. method > 0 ) THEN
     812           0 :             CALL MPI_TYPE_INDEXED( Num, InVector(Num+1), InVector,         &
     813           0 :                                    DataType, Ptr, Ierror )
     814           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
     815           0 :             Pattern%SendDesc(ipe)%type = Ptr
     816             :           ELSE
     817           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
     818             :           ENDIF
     819             : 
     820           0 :           ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
     821           0 :           ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
     822           0 :           DO i=1, Num
     823           0 :             Pattern%SendDesc(ipe)%Displacements(i) = InVector(i)
     824           0 :             Pattern%SendDesc(ipe)%BlockSizes(i)    = InVector(Num+i)
     825             :           ENDDO            
     826           0 :           Pattern%SendDesc(ipe)%Nparcels  = &
     827           0 :             size (Pattern%SendDesc(ipe)%Displacements)
     828           0 :           Pattern%SendDesc(ipe)%Tot_Size = &
     829           0 :             sum  (Pattern%SendDesc(ipe)%Blocksizes)
     830           0 :           Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
     831             : 
     832             :       ENDDO
     833             : 
     834           0 :       CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
     835             : 
     836             : !
     837             : ! Clean up the locally allocate variables
     838             : !
     839           0 :       DEALLOCATE( OutVector )
     840           0 :       DEALLOCATE( InVector )
     841           0 :       DEALLOCATE( LenOutVector )
     842           0 :       DEALLOCATE( LenInVector )
     843             : 
     844             :       CPP_LEAVE_PROCEDURE( "PARPATTERNGHOST" )
     845           0 :       RETURN
     846             : !EOC
     847           0 :       END SUBROUTINE ParPatternGhost
     848             : !-----------------------------------------------------------------------
     849             : 
     850             : !-----------------------------------------------------------------------
     851             : !BOP
     852             : ! !IROUTINE:   ParPatternDecompToDecomp --- Create pattern between decomps
     853             : !
     854             : ! !INTERFACE:
     855       21504 :       SUBROUTINE ParPatternDecompToDecomp( InComm, DA, DB, Pattern, mod_method, T )
     856             : !
     857             : ! !USES:
     858             :       USE decompmodule, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
     859             :       USE mod_comm, ONLY : get_partneroffset
     860             :       IMPLICIT NONE
     861             : 
     862             : ! !INPUT PARAMETERS:
     863             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
     864             :       TYPE(DecompType),  INTENT( IN )      :: DA      ! Source Decomp Desc
     865             :       TYPE(DecompType),  INTENT( IN )      :: DB      ! Target Decomp Desc
     866             :       INTEGER,  INTENT( IN ), OPTIONAL     :: mod_method ! contiguous or derived type
     867             :       INTEGER, INTENT( IN ),  OPTIONAL     :: T       ! 
     868             : 
     869             : ! !OUTPUT PARAMETERS:
     870             :       TYPE(ParPatternType), INTENT( OUT )  :: Pattern ! Comm Pattern
     871             : !
     872             : ! !DESCRIPTION:
     873             : !     This routine contructs a communication pattern for a 
     874             : !     transformation from one decomposition to another, i.e., a 
     875             : !     so-called "transpose". The resulting communication pattern 
     876             : !     can be used in ParBegin/EndTransfer with the decomposed 
     877             : !     arrays as inputs.  
     878             : !
     879             : ! !SYSTEM ROUTINES:
     880             : !    MPI_COMM_SIZE,  MPI_COMM_RANK, MPI_COMM_DUP
     881             : !    MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
     882             : !
     883             : ! !REVISION HISTORY:
     884             : !   01.05.29   Sawyer     Creation from RedistributeCreate
     885             : !   01.07.13   Sawyer     Rewritten to minimize DecompGlobalToLocal
     886             : !   02.07.16   Sawyer     Added data type T
     887             : !   03.11.11   Mirin      Added optional argument mod_method
     888             : !   07.03.11   Mirin      Generalized to different sized decompositions
     889             : !   07.09.04   Dennis     Reduced amount of temporary memory usage
     890             : !
     891             : !EOP
     892             : !-----------------------------------------------------------------------
     893             : !BOC
     894             : ! !LOCAL VARIABLES:
     895             :       INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
     896             :       INTEGER Ptr                                ! Pointer type
     897             :       INTEGER GroupSize, Iam, Ierror, DataType
     898             :       INTEGER OldPe, TotalPtsA, NpesA, TotalPtsB, NpesB
     899             :       INTEGER              :: method
     900             :       INTEGER              :: nCount,maxCount,ierr,sz
     901             :       INTEGER              :: lenBjmd,nNeigh,maxLenB,maxNeigh
     902             : #ifdef _SMEMORY
     903             :       TYPE (ParInfoType) :: Info
     904             : #endif
     905             : 
     906       21504 :       INTEGER, ALLOCATABLE :: Count(:)           ! # segments for each recv PE
     907       21504 :       INTEGER, ALLOCATABLE :: CountOut(:)        ! # segments for each send PE
     908             : 
     909       21504 :       INTEGER, ALLOCATABLE :: DisplacementsA(:)  ! Generic displacements
     910       21504 :       INTEGER, ALLOCATABLE :: BlockSizesA(:)     ! Generic block sizes
     911       21504 :       INTEGER, ALLOCATABLE :: LocalA(:)          ! Generic Local indices
     912             : 
     913       21504 :       INTEGER, ALLOCATABLE :: DisplacementsB(:)  ! Displacements for B
     914       21504 :       INTEGER, ALLOCATABLE :: BlockSizesB(:)     ! Block sizes for B
     915       21504 :       INTEGER, ALLOCATABLE :: LocalB(:)          ! Local indices for B
     916       21504 :       INTEGER, ALLOCATABLE :: PeB(:)             ! Processor element numbers
     917             : 
     918             :       CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
     919             : 
     920       21504 :       IF (present(T)) THEN
     921        9216 :         DataType = T
     922             :       ELSE
     923       12288 :         DataType = CPP_MPI_REAL8
     924             :       ENDIF
     925             : 
     926       21504 :       IF (present(mod_method)) THEN
     927       21504 :         method = mod_method
     928             :       ELSE
     929             :         method = 0     ! Default method - see mod_comm for description
     930             :       ENDIF
     931             : 
     932             : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
     933             : 
     934       21504 :       CALL DecompInfo( DA, NpesA, TotalPtsA )
     935       21504 :       CALL DecompInfo( DB, NpesB, TotalPtsB )
     936             : 
     937       21504 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
     938       21504 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
     939       21504 :       CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
     940             : 
     941             : #ifdef _SMEMORY
     942             : ! Calculate info about the pattern 
     943       21504 :       call ParCalcInfo(InComm,DA,DB, Info)
     944       21504 :       TotalPtsA=Info%maxNumSeg
     945       21504 :       TotalPtsB=Info%maxNumSeg
     946             : #endif
     947             : 
     948       21504 :       Pattern%Size = GroupSize
     949       21504 :       Pattern%Iam  = Iam
     950             : !
     951             : ! Allocate the number of entries and list head arrays
     952             : !
     953             : 
     954             : !
     955             : ! Allocate the patterns
     956             : !
     957       64512 :       ALLOCATE( Pattern%SendDesc( NpesB ) )
     958    16536576 :       Pattern%SendDesc(:)%method = method
     959       21504 :       if (iam .ge. NpesA) then
     960           0 :          do ipe = 1, NpesB
     961           0 :             ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
     962           0 :             ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
     963           0 :             Pattern%SendDesc(ipe)%Tot_Size = -1
     964           0 :             Pattern%SendDesc(ipe)%Nparcels = -1
     965           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
     966           0 :             Pattern%SendDesc(ipe)%Displacements(1) = -1
     967           0 :             Pattern%SendDesc(ipe)%Blocksizes(1) = -1
     968             :          enddo
     969             :       endif
     970             : 
     971       64512 :       ALLOCATE( Pattern%RecvDesc( NpesA ) )
     972    16536576 :       Pattern%RecvDesc(:)%method = method
     973       21504 :       if (iam .ge. NpesB) then
     974           0 :          do ipe = 1, NpesA
     975           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
     976           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
     977           0 :             Pattern%RecvDesc(ipe)%Tot_Size = -1
     978           0 :             Pattern%RecvDesc(ipe)%Nparcels = -1
     979           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
     980           0 :             Pattern%RecvDesc(ipe)%Displacements(1) = -1
     981           0 :             Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
     982             :          enddo
     983             :       endif
     984             : 
     985             : !
     986             : ! Local allocations
     987             : !
     988       64512 :       ALLOCATE( DisplacementsA( TotalPtsA ) )   ! Allocate for worst case
     989       43008 :       ALLOCATE( BlockSizesA( TotalPtsA ) )      ! Allocate for worst case
     990       43008 :       ALLOCATE( LocalA( TotalPtsA ) )           ! Allocate for worst case
     991             : 
     992       64512 :       ALLOCATE( DisplacementsB( TotalPtsB ) )   ! Allocate for worst case
     993       43008 :       ALLOCATE( BlockSizesB( TotalPtsB ) )      ! Allocate for worst case
     994       43008 :       ALLOCATE( LocalB( TotalPtsB ) )           ! Allocate for worst case
     995       43008 :       ALLOCATE( PeB( TotalPtsB ) )              ! Allocate for worst case
     996             : 
     997       64512 :       ALLOCATE( Count( GroupSize ) )
     998       43008 :       ALLOCATE( CountOut( GroupSize ) )
     999             : 
    1000       21504 :       JB        = 0
    1001    16536576 :       Count     = 0
    1002    22361088 :       LenB      = 0
    1003    22361088 :       LocalA      = 0   !  (needed for parexchangevector later)
    1004    22361088 :       BlocksizesA = 0   !  (needed for parexchangevector later)
    1005             : 
    1006       21504 :       Num    = 0
    1007       21504 :       Inc    = 0
    1008             : 
    1009       21504 :     if (iam .lt. NpesB) then
    1010             : 
    1011             : !
    1012             : ! Parse through all the tags in the local segment
    1013      588288 :       DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
    1014      566784 :         OldPe     = -1         ! Set PE undefined
    1015    25139712 :         DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
    1016             : !
    1017             : ! Determine the index and PE of this entry on A. This might be inlined later
    1018             : !
    1019    24551424 :           CALL DecompGlobalToLocal( DA, Tag, Local, Pe )
    1020             : 
    1021             : !
    1022             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    1023             : !
    1024    24551424 :           IF ( Pe /= OldPe ) THEN
    1025     1022976 :             OldPe   = Pe
    1026     1022976 :             IF ( jb > 0 ) THEN
    1027     1006074 :               BlockSizesB(jb) = LenB
    1028     1006074 :               LenB = 0
    1029             :             ENDIF
    1030     1022976 :             jb = jb+1                     ! increment the segment index
    1031     1022976 :             DisplacementsB(jb) = Inc      ! Zero-based offset of local segment
    1032     1022976 :             LocalB(jb) = Local-1          ! The local index (zero-based)
    1033     1022976 :             PeB(jb) = Pe                  ! Note the ID of the sender
    1034     1022976 :             Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
    1035             :           ENDIF
    1036    24551424 :           LenB = LenB+1                   ! Good -- segment is getting longer
    1037    49669632 :           Inc = Inc+1                     ! Increment local index
    1038             :         ENDDO
    1039             :       ENDDO
    1040             : !
    1041             : ! Clean up
    1042             : !
    1043       21504 :       IF ( jb>0 ) BlockSizesB(jb) = LenB
    1044             : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
    1045             :       write(iulog,*) iam, "BlockSizes", BlockSizesB(1:jb), DisplacementsB(1:jb), PeB(1:jb), Count
    1046             : #endif
    1047             : 
    1048             :       CPP_ASSERT_F90( JB .LE. TotalPtsB )
    1049             : !
    1050             : ! Now create the pattern from the displacements and block sizes
    1051             : !
    1052       21504 :       Inc = 0
    1053    16536576 :       DO ipe = 1, NpesA
    1054             : !
    1055             : ! Find the segments which are relevant for the sender ipe
    1056             : ! Make compact arrays BlockSizes and Displacements 
    1057             : !
    1058   802182144 :         DO j = 1, jb
    1059   802160640 :           IF ( PeB(j) == ipe-1 ) THEN
    1060     1022976 :             Inc = Inc + 1
    1061     1022976 :             BlockSizesA(Inc) = BlockSizesB(j)
    1062     1022976 :             DisplacementsA(Inc) = DisplacementsB(j)
    1063     1022976 :             LocalA(Inc)      = LocalB(j)
    1064             :           ENDIF
    1065             :         ENDDO
    1066             :       ENDDO
    1067             :       CPP_ASSERT_F90( Inc .LE. TotalPtsA )
    1068             : 
    1069             : !
    1070             : ! Create the receiver communication pattern
    1071             : !
    1072       21504 :       Off = 0
    1073    16536576 :       DO ipe = 1, NpesA
    1074    16515072 :         Num = Count(ipe)
    1075             :         if(Num >0) then 
    1076             : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
    1077             :         write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num,         &
    1078             :                  "Displacements", DisplacementsA(Off+1:Off+Num),        &
    1079             :                  "BlockSizes", BlockSizesA(Off+1:Off+Num)
    1080             : #endif
    1081             :         endif
    1082    16515072 :           IF ( Num > 0 .and. method > 0 ) THEN
    1083             : 
    1084           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1),             &
    1085             :                                    DisplacementsA(Off+1),               &
    1086           0 :                                    DataType, Ptr, Ierror )
    1087           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1088           0 :             Pattern%RecvDesc(ipe)%type = Ptr
    1089             :           ELSE
    1090    16515072 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1091             :           ENDIF
    1092             : 
    1093    33186816 :           ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
    1094    16671744 :           ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
    1095    17538048 :           DO i=1, Num
    1096     1022976 :             Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
    1097    17538048 :             Pattern%RecvDesc(ipe)%BlockSizes(i)    = BlockSizesA(i+Off)
    1098             :           ENDDO
    1099           0 :           Pattern%RecvDesc(ipe)%Nparcels  = &
    1100    16515072 :             size (Pattern%RecvDesc(ipe)%Displacements)
    1101           0 :           Pattern%RecvDesc(ipe)%Tot_Size = &
    1102    17538048 :             sum  (Pattern%RecvDesc(ipe)%Blocksizes)
    1103    16515072 :           Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
    1104             : 
    1105    16536576 :         Off = Off + Num
    1106             :       ENDDO
    1107             : 
    1108             :     endif !  (iam .lt. NpesB)
    1109             : 
    1110             : !
    1111             : ! Now communicate what the receiver is expecting from the sender
    1112             : !
    1113             :       CALL ParExchangeVectorInt( InComm, Count, LocalA,                 &
    1114       21504 :                                  CountOut, DisplacementsB  )
    1115             :       CALL ParExchangeVectorInt( InComm, Count, BlockSizesA,            &
    1116       21504 :                                  CountOut, BlockSizesB )
    1117             : 
    1118             : !
    1119             : ! Sender A: BlockSizes and Displacements can now be stored
    1120             : !
    1121             : 
    1122       21504 :     if (iam .lt. NpesA) then
    1123             : 
    1124       21504 :       Off = 0
    1125    16536576 :       DO ipe=1, NpesB
    1126    16515072 :         Num = CountOut(ipe)
    1127             :         if(Num>0) then 
    1128             : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
    1129             :         write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num,           &
    1130             :                  "Displacements", DisplacementsB(Off+1:Off+Num),        &
    1131             :                  "BlockSizes", BlockSizesB(Off+1:Off+Num)
    1132             : #endif
    1133             :         endif
    1134    16515072 :           IF ( Num > 0 .and. method > 0 ) THEN
    1135           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1),             &
    1136             :                                    DisplacementsB(Off+1),               &
    1137           0 :                                    DataType, Ptr, Ierror )
    1138           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1139           0 :             Pattern%SendDesc(ipe)%type = Ptr
    1140             :           ELSE
    1141    16515072 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1142             :           ENDIF
    1143             : 
    1144    33186816 :           ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
    1145    16671744 :           ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
    1146    17538048 :           DO i=1, Num
    1147     1022976 :             Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
    1148    17538048 :             Pattern%SendDesc(ipe)%BlockSizes(i)    = BlockSizesB(i+Off)
    1149             :           ENDDO
    1150           0 :           Pattern%SendDesc(ipe)%Nparcels  =  &
    1151    16515072 :             size (Pattern%SendDesc(ipe)%Displacements)
    1152           0 :           Pattern%SendDesc(ipe)%Tot_Size = &
    1153    17538048 :             sum  (Pattern%SendDesc(ipe)%Blocksizes)
    1154    16515072 :           Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
    1155             : 
    1156    16536576 :         Off = Off + Num
    1157             :       ENDDO
    1158             : 
    1159             :     endif !  (iam .lt. NpesA)
    1160             : 
    1161       21504 :       CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
    1162             :       
    1163       21504 :       DEALLOCATE( CountOut )
    1164       21504 :       DEALLOCATE( Count )
    1165             : 
    1166       21504 :       DEALLOCATE( PeB )
    1167       21504 :       DEALLOCATE( LocalB )
    1168       21504 :       DEALLOCATE( BlockSizesB )
    1169       21504 :       DEALLOCATE( DisplacementsB )
    1170             : 
    1171       21504 :       DEALLOCATE( LocalA )
    1172       21504 :       DEALLOCATE( BlockSizesA )
    1173       21504 :       DEALLOCATE( DisplacementsA )
    1174             : 
    1175             :       CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
    1176       21504 :       RETURN
    1177             : !EOC
    1178       21504 :       END SUBROUTINE ParPatternDecompToDecomp
    1179             : !-----------------------------------------------------------------------
    1180             : 
    1181             : 
    1182             : !-----------------------------------------------------------------------
    1183             : !BOP
    1184             : ! !IROUTINE:   ParPatternDecompToGhost --- Create pattern decomp to ghost
    1185             : !
    1186             : ! !INTERFACE:
    1187        7680 :       SUBROUTINE ParPatternDecompToGhost( InComm, DA, GB, Pattern, mod_method, T )
    1188             : !
    1189             : ! !USES:
    1190             :       USE decompmodule, ONLY : DecompType, DecompGlobalToLocal,         &
    1191             :                                DecompInfo
    1192             :       USE ghostmodule, ONLY : GhostType, GhostInfo
    1193             :       USE mod_comm, ONLY : get_partneroffset
    1194             :       IMPLICIT NONE
    1195             : 
    1196             : ! !INPUT PARAMETERS:
    1197             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
    1198             :       TYPE(DecompType),  INTENT( IN )      :: DA      ! Source Ghost Desc
    1199             :       TYPE(GhostType),  INTENT( IN )       :: GB      ! Target Ghost Desc
    1200             :       INTEGER,  INTENT( IN ), OPTIONAL     :: mod_method ! contiguous or derived type
    1201             :       INTEGER, INTENT( IN ),  OPTIONAL     :: T       !
    1202             : 
    1203             : ! !OUTPUT PARAMETERS:
    1204             :       TYPE(ParPatternType), INTENT( OUT )  :: Pattern ! Comm Pattern
    1205             : !
    1206             : ! !DESCRIPTION:
    1207             : !     This routine contructs a communication pattern for a transformation
    1208             : !     from decomposition to a ghosted decomposition, i.e., a so-called 
    1209             : !     "transpose".  The resulting communication pattern can be used in 
    1210             : !     ParBegin/EndTransfer with the decomposed arrays as inputs.  
    1211             : !
    1212             : ! !SYSTEM ROUTINES:
    1213             : !    MPI_COMM_SIZE,  MPI_COMM_RANK, MPI_COMM_DUP
    1214             : !    MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
    1215             : !
    1216             : ! !REVISION HISTORY:
    1217             : !   01.07.12   Sawyer     Creation from ParPatternDecompToDecomp
    1218             : !   02.03.20   Sawyer     Bug fix: added OldLocal, increment Off
    1219             : !   02.07.16   Sawyer     Added data type T
    1220             : !   03.11.11   Mirin      Added optional argument mod_method
    1221             : !   07.03.11   Mirin      Generalized to different sized decompositions
    1222             : !   07.09.04   Dennis     Reduced amount of temporary memory usage
    1223             : !
    1224             : !EOP
    1225             : !-----------------------------------------------------------------------
    1226             : !BOC
    1227             : ! !LOCAL VARIABLES:
    1228             :       INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
    1229             :       INTEGER Ptr                                ! Pointer type
    1230             :       INTEGER GroupSize, Iam, Ierror
    1231             :       INTEGER OldPe, OldLocal, TotalPtsA, NpesA
    1232             :       INTEGER GlobalSizeB, LocalSizeB, BorderSizeB, NpesB
    1233             :       INTEGER DataType
    1234             :       INTEGER              :: method
    1235             :       INTEGER              :: nCount, maxCount, ierr
    1236             : #ifdef _SMEMORY
    1237             :       TYPE (ParInfoType) :: Info
    1238             : #endif
    1239             : 
    1240        7680 :       INTEGER, ALLOCATABLE :: Count(:)           ! # segments for each recv PE
    1241        7680 :       INTEGER, ALLOCATABLE :: CountOut(:)        ! # segments for each send PE
    1242             : 
    1243        7680 :       INTEGER, ALLOCATABLE :: DisplacementsA(:)  ! Generic displacements
    1244        7680 :       INTEGER, ALLOCATABLE :: BlockSizesA(:)     ! Generic block sizes
    1245        7680 :       INTEGER, ALLOCATABLE :: LocalA(:)          ! Generic Local indices
    1246             : 
    1247        7680 :       INTEGER, ALLOCATABLE :: DisplacementsB(:)  ! Displacements for B
    1248        7680 :       INTEGER, ALLOCATABLE :: BlockSizesB(:)     ! Block sizes for B
    1249        7680 :       INTEGER, ALLOCATABLE :: LocalB(:)          ! Local indices for B
    1250        7680 :       INTEGER, ALLOCATABLE :: PeB(:)             ! Processor element numbers
    1251             : 
    1252             :       CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
    1253             : 
    1254        7680 :       IF (present(T)) THEN
    1255           0 :         DataType = T
    1256             :       ELSE
    1257        7680 :         DataType = CPP_MPI_REAL8
    1258             :       ENDIF
    1259             : 
    1260        7680 :       IF (present(mod_method)) THEN
    1261        7680 :         method = mod_method
    1262             :       ELSE
    1263             :         method = 0     ! Default method - see mod_comm for description
    1264             :       ENDIF
    1265             : 
    1266             : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
    1267             : 
    1268        7680 :       CALL DecompInfo( DA, NpesA, TotalPtsA )
    1269        7680 :       CALL GhostInfo( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
    1270             : 
    1271        7680 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    1272        7680 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    1273        7680 :       CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
    1274             : 
    1275             : #ifdef _SMEMORY
    1276             : ! Calculate info about the pattern 
    1277        7680 :       call ParCalcInfo(InComm,DA,GB, Info)
    1278        7680 :       TotalPtsA=Info%maxNumSeg
    1279        7680 :       GlobalSizeB=Info%maxNumSeg
    1280             : #endif
    1281             : 
    1282        7680 :       Pattern%Size = GroupSize
    1283        7680 :       Pattern%Iam  = Iam
    1284             : !
    1285             : ! Allocate the number of entries and list head arrays
    1286             : !
    1287             : 
    1288             : !
    1289             : ! Allocate the patterns
    1290             : !
    1291       23040 :       ALLOCATE( Pattern%SendDesc( NpesB ) )
    1292     5905920 :       Pattern%SendDesc(:)%method = method
    1293        7680 :       if (iam .ge. NpesA) then
    1294           0 :          do ipe = 1, NpesB
    1295           0 :             ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
    1296           0 :             ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
    1297           0 :             Pattern%SendDesc(ipe)%Tot_Size = -1
    1298           0 :             Pattern%SendDesc(ipe)%Nparcels = -1
    1299           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1300           0 :             Pattern%SendDesc(ipe)%Displacements(1) = -1
    1301           0 :             Pattern%SendDesc(ipe)%Blocksizes(1) = -1
    1302             :          enddo
    1303             :       endif
    1304             : 
    1305       23040 :       ALLOCATE( Pattern%RecvDesc( NpesA ) )
    1306     5905920 :       Pattern%RecvDesc(:)%method = method
    1307        7680 :       if (iam .ge. NpesB) then
    1308           0 :          do ipe = 1, NpesA
    1309           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
    1310           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
    1311           0 :             Pattern%RecvDesc(ipe)%Tot_Size = -1
    1312           0 :             Pattern%RecvDesc(ipe)%Nparcels = -1
    1313           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1314           0 :             Pattern%RecvDesc(ipe)%Displacements(1) = -1
    1315           0 :             Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
    1316             :          enddo
    1317             :       endif
    1318             : 
    1319             : !
    1320             : ! Local allocations
    1321             : !
    1322       23040 :       ALLOCATE( DisplacementsA( TotalPtsA ) )   ! Allocate for worst case
    1323       15360 :       ALLOCATE( BlockSizesA( TotalPtsA ) )      ! Allocate for worst case
    1324       15360 :       ALLOCATE( LocalA( TotalPtsA ) )           ! Allocate for worst case
    1325             : 
    1326       23040 :       ALLOCATE( DisplacementsB( GlobalSizeB ) ) ! Allocate for worst case
    1327       15360 :       ALLOCATE( BlockSizesB( GlobalSizeB ) )    ! Allocate for worst case
    1328       15360 :       ALLOCATE( LocalB( GlobalSizeB ) )         ! Allocate for worst case
    1329       15360 :       ALLOCATE( PeB( GlobalSizeB ) )            ! Allocate for worst case
    1330             : 
    1331       23040 :       ALLOCATE( Count( GroupSize ) )
    1332       15360 :       ALLOCATE( CountOut( GroupSize ) )
    1333             : 
    1334        7680 :       JB        = 0
    1335     5905920 :       Count     = 0
    1336     1943040 :       LenB      = 0
    1337     1943040 :       LocalA      = 0   !  (needed for parexchangevector later)
    1338     1943040 :       BlocksizesA = 0   !  (needed for parexchangevector later)
    1339             : 
    1340        7680 :       Num    = 0
    1341        7680 :       Inc    = 0
    1342             : 
    1343        7680 :     if (iam .lt. NpesB) then
    1344             : 
    1345             : !
    1346             : ! Parse through all the tags in the local segment
    1347      361984 :       DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
    1348      354304 :         OldPe     = -1         ! Set PE undefined
    1349      354304 :         OldLocal  =  0         ! Set local index undefined
    1350    41936896 :         DO Tag=GB%Local%Head(iam+1)%StartTags(J),                         &
    1351      361984 :                 GB%Local%Head(iam+1)%EndTags(J)
    1352    41582592 :           IF ( Tag > 0 ) THEN        ! Active point
    1353             : !
    1354             : ! Determine the index and PE of this entry on A. This might be inlined later
    1355             : !
    1356    41250816 :             CALL DecompGlobalToLocal( DA, Tag, Local, Pe )
    1357             : 
    1358             : !
    1359             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    1360             : !
    1361    41250816 :             IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
    1362     1718784 :               IF ( jb > 0 ) THEN
    1363     1711104 :                 BlockSizesB(jb) = LenB
    1364     1711104 :                 LenB = 0
    1365             :               ENDIF
    1366     1718784 :               jb = jb+1                     ! increment the segment index
    1367     1718784 :               DisplacementsB(jb) = Inc      ! Zero-based offset of local segment
    1368     1718784 :               LocalB(jb) = Local-1          ! Local indices (zero-based)
    1369     1718784 :               PeB(jb) = Pe                  ! Note the ID of the sender
    1370     1718784 :               Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
    1371             :             ENDIF
    1372    41250816 :             OldPe   = Pe                    ! Update PE
    1373    41250816 :             OldLocal= Local                 ! Update local index
    1374    41250816 :             LenB = LenB+1                   ! Good -- segment is getting longer
    1375             :           ENDIF
    1376    41936896 :           Inc = Inc+1                     ! Increment local index
    1377             :         ENDDO
    1378             :       ENDDO
    1379             : !
    1380             : ! Clean up
    1381             : !
    1382        7680 :       IF ( jb>0 ) BlockSizesB(jb) = LenB
    1383             : 
    1384             :       CPP_ASSERT_F90( JB .LE. GlobalSize )
    1385             : !
    1386             : ! Now create the pattern from the displacements and block sizes
    1387             : !
    1388        7680 :       Inc = 0
    1389     5905920 :       DO ipe = 1, NpesA
    1390             : !
    1391             : ! Find the segments which are relevant for the sender ipe
    1392             : ! Make compact arrays BlockSizes and Displacements 
    1393             : !
    1394  1325932032 :         DO j = 1, jb
    1395  1325924352 :           IF ( PeB(j) == ipe-1 ) THEN
    1396     1718784 :             Inc = Inc + 1
    1397     1718784 :             BlockSizesA(Inc) = BlockSizesB(j)
    1398     1718784 :             DisplacementsA(Inc) = DisplacementsB(j)
    1399     1718784 :             LocalA(Inc)      = LocalB(j)
    1400             :           ENDIF
    1401             :         ENDDO
    1402             :       ENDDO
    1403             : 
    1404             :       CPP_ASSERT_F90( Inc .LE. TotalPtsA )
    1405             : 
    1406        7680 :       Off = 0
    1407     5905920 :       DO ipe = 1, NpesA
    1408     5898240 :         Num = Count(ipe)
    1409             : #if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
    1410             :         write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
    1411             :                  "Displacements", DisplacementsA(Off+1:Off+Num), &
    1412             :                  "BlockSizes", BlockSizesA(Off+1:Off+Num)
    1413             : #endif
    1414             : 
    1415             : !
    1416             : ! Create the receiver communication pattern
    1417             : !
    1418     5898240 :           IF ( Num > 0 .and. method > 0 ) THEN
    1419           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1),        &
    1420           0 :                    DisplacementsA(Off+1), DataType, Ptr, Ierror )
    1421           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1422           0 :             Pattern%RecvDesc(ipe)%type = Ptr
    1423             :           ELSE
    1424     5898240 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1425             :           ENDIF
    1426             : 
    1427    11997504 :           ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
    1428     6099264 :           ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
    1429     7617024 :           DO i=1, Num
    1430     1718784 :             Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
    1431     7617024 :             Pattern%RecvDesc(ipe)%BlockSizes(i)    = BlockSizesA(i+Off)
    1432             :           ENDDO
    1433           0 :           Pattern%RecvDesc(ipe)%Nparcels  = &
    1434     5898240 :             size (Pattern%RecvDesc(ipe)%Displacements)
    1435           0 :           Pattern%RecvDesc(ipe)%Tot_Size = &
    1436     7617024 :             sum  (Pattern%RecvDesc(ipe)%Blocksizes)
    1437     5898240 :           Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
    1438             : 
    1439     5905920 :         Off = Off + Num
    1440             :       ENDDO
    1441             : 
    1442             :     endif !  (iam .lt. NpesB)
    1443             : 
    1444             : !
    1445             : ! Now communicate what the receiver is expecting to the sender
    1446             : !
    1447             :       CALL ParExchangeVectorInt( InComm, Count, LocalA,                 &
    1448        7680 :                                  CountOut, DisplacementsB  )
    1449             :       CALL ParExchangeVectorInt( InComm, Count, BlockSizesA,            &
    1450        7680 :                                  CountOut, BlockSizesB )
    1451             : 
    1452             : !
    1453             : ! Sender A: BlockSizes and Displacements can now be stored
    1454             : !
    1455             : 
    1456        7680 :     if (iam .lt. NpesA) then
    1457             : 
    1458        7680 :       Off = 0
    1459     5905920 :       DO ipe=1, NpesB
    1460     5898240 :         Num = CountOut(ipe)
    1461             : #if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
    1462             :         write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num,           &
    1463             :                  "Displacements", DisplacementsB(Off+1:Off+Num),        &
    1464             :                  "BlockSizes", BlockSizesB(Off+1:Off+Num)
    1465             : #endif
    1466             : 
    1467     5898240 :           IF ( Num > 0 .and. method > 0 ) THEN
    1468           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1),          &
    1469           0 :                     DisplacementsB(Off+1), DataType, Ptr, Ierror )
    1470           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1471           0 :             Pattern%SendDesc(ipe)%type = Ptr
    1472             :           ELSE
    1473     5898240 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1474             :           ENDIF
    1475             : 
    1476    11997504 :           ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
    1477     6099264 :           ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
    1478     7617024 :           DO i=1, Num
    1479     1718784 :             Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
    1480     7617024 :             Pattern%SendDesc(ipe)%BlockSizes(i)    = BlockSizesB(i+Off)
    1481             :           ENDDO
    1482           0 :           Pattern%SendDesc(ipe)%Nparcels  = &
    1483     5898240 :             size (Pattern%SendDesc(ipe)%Displacements)
    1484           0 :           Pattern%SendDesc(ipe)%Tot_Size = &
    1485     7617024 :             sum  (Pattern%SendDesc(ipe)%Blocksizes)
    1486     5898240 :           Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
    1487             : 
    1488     5905920 :         Off = Off + Num
    1489             :       ENDDO
    1490             : 
    1491             :     endif !  (iam .lt. NpesA)
    1492             : 
    1493        7680 :       CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
    1494             :       
    1495        7680 :       DEALLOCATE( CountOut )
    1496        7680 :       DEALLOCATE( Count )
    1497             : 
    1498        7680 :       DEALLOCATE( PeB )
    1499        7680 :       DEALLOCATE( LocalB )
    1500        7680 :       DEALLOCATE( BlockSizesB )
    1501        7680 :       DEALLOCATE( DisplacementsB )
    1502             : 
    1503        7680 :       DEALLOCATE( LocalA )
    1504        7680 :       DEALLOCATE( BlockSizesA )
    1505        7680 :       DEALLOCATE( DisplacementsA )
    1506             : 
    1507             :       CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
    1508        7680 :       RETURN
    1509             : !EOC
    1510        7680 :       END SUBROUTINE ParPatternDecompToGhost
    1511             : !-----------------------------------------------------------------------
    1512             : 
    1513             : 
    1514             : !-----------------------------------------------------------------------
    1515             : !BOP
    1516             : ! !IROUTINE:   ParPatternGhostToDecomp --- Create pattern between decomps
    1517             : !
    1518             : ! !INTERFACE:
    1519        6144 :       SUBROUTINE ParPatternGhostToDecomp( InComm, GA, DB, Pattern, mod_method, T )
    1520             : !
    1521             : ! !USES:
    1522             :       USE decompmodule, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
    1523             :       USE ghostmodule, ONLY : GhostType, GhostInfo
    1524             :       USE mod_comm, ONLY : get_partneroffset
    1525             :       IMPLICIT NONE
    1526             : 
    1527             : ! !INPUT PARAMETERS:
    1528             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
    1529             :       TYPE(GhostType),   INTENT( IN )      :: GA      ! Source Decomp Desc
    1530             :       TYPE(DecompType),  INTENT( IN )      :: DB      ! Target Decomp Desc
    1531             :       INTEGER,  INTENT( IN ), OPTIONAL     :: mod_method ! contiguous or derived type
    1532             :       INTEGER, INTENT( IN ),  OPTIONAL     :: T       !
    1533             : ! !OUTPUT PARAMETERS:
    1534             :       TYPE(ParPatternType), INTENT( OUT )  :: Pattern ! Comm Pattern
    1535             : !
    1536             : ! !DESCRIPTION:
    1537             : !     This routine contructs a communication pattern for a 
    1538             : !     transformation from one ghosted decomposition to partitioned
    1539             : !     one, i.e., a so-called "transpose". The resulting communication 
    1540             : !     pattern can be used in ParBegin/EndTransfer with the decomposed 
    1541             : !     arrays as inputs.  
    1542             : !
    1543             : ! !SYSTEM ROUTINES:
    1544             : !    MPI_COMM_SIZE,  MPI_COMM_RANK, MPI_COMM_DUP
    1545             : !    MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
    1546             : !
    1547             : ! !REVISION HISTORY:
    1548             : !   02.01.10   Sawyer     Creation from DecompToDecomp
    1549             : !   02.07.16   Sawyer     Added data type T
    1550             : !   03.11.11   Mirin      Added optional argument mod_method
    1551             : !   07.03.11   Mirin      Generalized to different sized decompositions
    1552             : !   07.09.04   Dennis     Reduced amount of temporary memory usage
    1553             : !
    1554             : !EOP
    1555             : !-----------------------------------------------------------------------
    1556             : !BOC
    1557             : ! !LOCAL VARIABLES:
    1558             :       INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
    1559             :       INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
    1560             :       INTEGER OldPe, OldLocal, TotalPtsB, NpesB
    1561             :       INTEGER GroupSize, Iam, Ierror
    1562             :       INTEGER Ptr                                ! Pointer type
    1563             :       INTEGER  DataType
    1564             :       INTEGER              :: method
    1565             :       INTEGER              :: nCount, maxCount, ierr
    1566             : #ifdef _SMEMORY
    1567             :       TYPE (ParInfoType) :: Info
    1568             : #endif
    1569             : 
    1570        6144 :       INTEGER, ALLOCATABLE :: Count(:)           ! # segments for each recv PE
    1571        6144 :       INTEGER, ALLOCATABLE :: CountOut(:)        ! # segments for each send PE
    1572             : 
    1573        6144 :       INTEGER, ALLOCATABLE :: DisplacementsA(:)  ! Generic displacements
    1574        6144 :       INTEGER, ALLOCATABLE :: BlockSizesA(:)     ! Generic block sizes
    1575        6144 :       INTEGER, ALLOCATABLE :: GlobalA(:)          ! Generic Local indices
    1576        6144 :       INTEGER, ALLOCATABLE :: PeA(:)             ! Processor element numbers
    1577             : 
    1578        6144 :       INTEGER, ALLOCATABLE :: DisplacementsB(:)  ! Displacements for B
    1579        6144 :       INTEGER, ALLOCATABLE :: BlockSizesB(:)     ! Block sizes for B
    1580        6144 :       INTEGER, ALLOCATABLE :: GlobalB(:)         ! Global indices for B
    1581             : 
    1582             :       CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
    1583             : 
    1584        6144 :       IF (present(T)) THEN
    1585           0 :         DataType = T
    1586             :       ELSE
    1587        6144 :         DataType = CPP_MPI_REAL8
    1588             :       ENDIF
    1589             : 
    1590        6144 :       IF (present(mod_method)) THEN
    1591        6144 :         method = mod_method
    1592             :       ELSE
    1593             :         method = 0     ! Default method - see mod_comm for description
    1594             :       ENDIF
    1595             : 
    1596             : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
    1597             : 
    1598        6144 :       CALL GhostInfo( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
    1599        6144 :       CALL DecompInfo( DB, NpesB, TotalPtsB )
    1600             : 
    1601        6144 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    1602        6144 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    1603        6144 :       CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
    1604             : 
    1605             : #ifdef _SMEMORY
    1606             : ! Calculate info about the pattern 
    1607        6144 :       call ParCalcInfo(InComm,GA,DB, Info)
    1608        6144 :       GlobalSizeA=Info%maxNumSeg
    1609        6144 :       TotalPtsB=Info%maxNumSeg
    1610             : #endif
    1611             : 
    1612        6144 :       Pattern%Size = GroupSize
    1613        6144 :       Pattern%Iam  = Iam
    1614             : !
    1615             : ! Allocate the number of entries and list head arrays
    1616             : !
    1617             : 
    1618             : !
    1619             : ! Allocate the patterns
    1620             : !
    1621       18432 :       ALLOCATE( Pattern%SendDesc( NpesB ) )
    1622     4724736 :       Pattern%SendDesc(:)%method = method
    1623        6144 :       if (iam .ge. NpesA) then
    1624           0 :          do ipe = 1, NpesB
    1625           0 :             ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
    1626           0 :             ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
    1627           0 :             Pattern%SendDesc(ipe)%Tot_Size = -1
    1628           0 :             Pattern%SendDesc(ipe)%Nparcels = -1
    1629           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1630           0 :             Pattern%SendDesc(ipe)%Displacements(1) = -1
    1631           0 :             Pattern%SendDesc(ipe)%Blocksizes(1) = -1
    1632             :          enddo
    1633             :       endif
    1634             : 
    1635       18432 :       ALLOCATE( Pattern%RecvDesc( NpesA ) )
    1636     4724736 :       Pattern%RecvDesc(:)%method = method
    1637        6144 :       if (iam .ge. NpesB) then
    1638           0 :          do ipe = 1, NpesA
    1639           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
    1640           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
    1641           0 :             Pattern%RecvDesc(ipe)%Tot_Size = -1
    1642           0 :             Pattern%RecvDesc(ipe)%Nparcels = -1
    1643           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1644           0 :             Pattern%RecvDesc(ipe)%Displacements(1) = -1
    1645           0 :             Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
    1646             :          enddo
    1647             :       endif
    1648             : 
    1649             : !
    1650             : ! Local allocations
    1651             : !
    1652       18432 :       ALLOCATE( DisplacementsA( GlobalSizeA ) )   ! Allocate for worst case
    1653       12288 :       ALLOCATE( BlockSizesA( GlobalSizeA ) )      ! Allocate for worst case
    1654       12288 :       ALLOCATE( GlobalA( GlobalSizeA ) )          ! Allocate for worst case
    1655       12288 :       ALLOCATE( PeA( GlobalSizeA ) )              ! Allocate for worst case
    1656             : 
    1657       18432 :       ALLOCATE( DisplacementsB( TotalPtsB ) )     ! Allocate for worst case
    1658       12288 :       ALLOCATE( BlockSizesB( TotalPtsB ) )        ! Allocate for worst case
    1659       12288 :       ALLOCATE( GlobalB( TotalPtsB ) )            ! Allocate for worst case
    1660             : 
    1661       18432 :       ALLOCATE( Count( GroupSize ) )
    1662       12288 :       ALLOCATE( CountOut( GroupSize ) )
    1663             : 
    1664        6144 :       JA     = 0
    1665     4724736 :       Count  = 0
    1666      669696 :       Len    = 0
    1667      669696 :       GlobalB     = 0   !  (needed for parexchangevector later)
    1668      669696 :       BlockSizesB = 0   !  (needed for parexchangevector later)
    1669             : 
    1670        6144 :       Num    = 0
    1671        6144 :       Inc    = 0
    1672             : 
    1673        6144 :     if (iam .lt. NpesB) then
    1674             : 
    1675             : !
    1676             : ! Parse through all the tags in the local segment
    1677      600576 :       DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
    1678      594432 :         OldPe     = -1         ! Set PE undefined
    1679      594432 :         OldLocal  = 0          ! Set index value undefined
    1680    14866944 :         DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
    1681             : !
    1682             : ! Determine the index and PE of this entry on A. This might be inlined later
    1683             : !
    1684    14266368 :           CALL DecompGlobalToLocal( GA%Decomp, Tag, Local, Pe )
    1685             : 
    1686             : !
    1687             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    1688             : !
    1689    14266368 :           IF ( Pe /= OldPe  .OR. Local /= OldLocal+1 ) THEN
    1690      594432 :             IF ( ja > 0 ) THEN
    1691      588288 :               BlockSizesA(ja) = Len
    1692      588288 :               Len = 0
    1693             :             ENDIF
    1694      594432 :             ja = ja+1                     ! increment the segment index
    1695      594432 :             DisplacementsA(ja) = Inc      ! Zero-based offset of local segment
    1696      594432 :             GlobalA(ja) = Tag             ! The global tag of the desired datum
    1697      594432 :             PeA(ja) = Pe                  ! Note the ID of the sender
    1698      594432 :             Count(Pe+1) = Count(Pe+1)+1   ! Increment counter of segments
    1699             :           ENDIF
    1700    14266368 :           OldPe    = Pe                   ! Update old PE
    1701    14266368 :           OldLocal = Local                ! Update old local index
    1702    14266368 :           Len = Len+1                     ! Good -- segment is getting longer
    1703    29127168 :           Inc = Inc+1                     ! Increment local index
    1704             :         ENDDO
    1705             :       ENDDO
    1706             : !
    1707             : ! Clean up
    1708             : !
    1709        6144 :       BlockSizesA(ja) = Len
    1710             :       CPP_ASSERT_F90( JA .LE. GlobalSizeA )
    1711             : !
    1712             : ! Now create the pattern from the displacements and block sizes
    1713             : !
    1714        6144 :       Inc = 0
    1715     4724736 :       DO ipe = 1, NpesA
    1716             : !
    1717             : ! Find the segments which are relevant for the sender ipe
    1718             : ! Make compact arrays BlockSizes and Displacements 
    1719             : !
    1720   461248512 :         DO j = 1, ja
    1721   461242368 :           IF ( PeA(j) == ipe-1 ) THEN
    1722      594432 :             Inc = Inc + 1
    1723      594432 :             BlockSizesB(Inc) = BlockSizesA(j)
    1724      594432 :             DisplacementsB(Inc) = DisplacementsA(j)
    1725      594432 :             GlobalB(Inc)      = GlobalA(j)
    1726             :           ENDIF
    1727             :         ENDDO
    1728             :       ENDDO
    1729             : 
    1730             :      CPP_ASSERT_F90(Inc .LE. TotalPtsB)
    1731             : 
    1732             : !
    1733             : ! Create the receiver communication pattern
    1734             : !
    1735        6144 :       Off = 0
    1736     4724736 :       DO ipe = 1, NpesA
    1737     4718592 :         Num = Count(ipe)
    1738             : #if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
    1739             :         write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
    1740             :                  "Displacements", DisplacementsB(Off+1:Off+Num), &
    1741             :                  "BlockSizes", BlockSizesB(Off+1:Off+Num)
    1742             : #endif
    1743             : 
    1744     4718592 :           IF ( Num > 0 .and. method > 0 ) THEN
    1745           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1),       &
    1746           0 :                DisplacementsB(Off+1), DataType, Ptr, Ierror )
    1747           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1748           0 :             Pattern%RecvDesc(ipe)%type = Ptr
    1749             :           ELSE
    1750     4718592 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1751             :           ENDIF
    1752             : 
    1753     9510912 :           ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
    1754     4792320 :           ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
    1755     5313024 :           DO i=1, Num
    1756      594432 :             Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
    1757     5313024 :             Pattern%RecvDesc(ipe)%BlockSizes(i)    = BlockSizesB(i+Off)
    1758             :           ENDDO
    1759           0 :           Pattern%RecvDesc(ipe)%Nparcels  = &
    1760     4718592 :             size (Pattern%RecvDesc(ipe)%Displacements)
    1761           0 :           Pattern%RecvDesc(ipe)%Tot_Size = &
    1762     5313024 :             sum  (Pattern%RecvDesc(ipe)%Blocksizes)
    1763     4718592 :           Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
    1764             : 
    1765     4724736 :         Off = Off + Num
    1766             :       ENDDO
    1767             : 
    1768             :     endif !  (iam .lt. NpesB)
    1769             : 
    1770             : !
    1771             : ! Now communicate what the receiver is expecting to the sender
    1772             : !
    1773             :       CALL ParExchangeVectorInt( InComm, Count, GlobalB,                 &
    1774        6144 :                                  CountOut, GlobalA  )
    1775             :       CALL ParExchangeVectorInt( InComm, Count, BlockSizesB,            &
    1776        6144 :                                  CountOut, BlockSizesA )
    1777             : 
    1778        6144 :     if (iam .lt. NpesA) then
    1779             : 
    1780             : !
    1781             : ! Sender A: BlockSizes and Displacements can now be stored
    1782             : !
    1783        6144 :       Off = 0
    1784     4724736 :       DO ipe=1, NpesB
    1785     4718592 :         Num = CountOut(ipe)
    1786     5313024 :         DO i=1, Num
    1787      594432 :           CALL DecompGlobalToLocal( GA%Local, GlobalA(i+Off), Local, Pe )
    1788     5313024 :           DisplacementsA(i+Off) = Local-1    ! zero-based displacement
    1789             :         ENDDO
    1790             : #if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
    1791             :         write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num,  &
    1792             :                  "Displacements", DisplacementsA(Off+1:Off+Num), &
    1793             :                  "BlockSizes", BlockSizesA(Off+1:Off+Num)
    1794             : #endif
    1795             : 
    1796     4718592 :           IF ( Num > 0 .and. method > 0 ) THEN
    1797           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1),        &
    1798           0 :                     DisplacementsA(Off+1), DataType, Ptr, Ierror )
    1799           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    1800           0 :             Pattern%SendDesc(ipe)%type = Ptr
    1801             :           ELSE
    1802     4718592 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1803             :           ENDIF
    1804             : 
    1805     9510912 :           ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
    1806     4792320 :           ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
    1807     5313024 :           DO i=1, Num
    1808      594432 :             Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
    1809     5313024 :             Pattern%SendDesc(ipe)%BlockSizes(i)    = BlockSizesA(i+Off)
    1810             :           ENDDO
    1811           0 :           Pattern%SendDesc(ipe)%Nparcels  = &
    1812     4718592 :             size (Pattern%SendDesc(ipe)%Displacements)
    1813           0 :           Pattern%SendDesc(ipe)%Tot_Size = &
    1814     5313024 :             sum  (Pattern%SendDesc(ipe)%Blocksizes)
    1815     4718592 :           Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
    1816             : 
    1817     4724736 :         Off = Off + Num
    1818             :       ENDDO
    1819             : 
    1820             :     endif !  (iam .lt. NpesA)
    1821             : 
    1822        6144 :       CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
    1823             :       
    1824        6144 :       DEALLOCATE( CountOut )
    1825        6144 :       DEALLOCATE( Count )
    1826             : 
    1827        6144 :       DEALLOCATE( PeA )
    1828        6144 :       DEALLOCATE( GlobalA )
    1829        6144 :       DEALLOCATE( BlockSizesA )
    1830        6144 :       DEALLOCATE( DisplacementsA )
    1831             : 
    1832        6144 :       DEALLOCATE( GlobalB )
    1833        6144 :       DEALLOCATE( BlockSizesB )
    1834        6144 :       DEALLOCATE( DisplacementsB )
    1835             : 
    1836             :       CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
    1837        6144 :       RETURN
    1838             : !EOC
    1839        6144 :       END SUBROUTINE ParPatternGhostToDecomp
    1840             : !-----------------------------------------------------------------------
    1841             : 
    1842             : !-----------------------------------------------------------------------
    1843             : !BOP
    1844             : ! !IROUTINE:   ParPatternGhostToGhost --- Create pattern between decomps
    1845             : !
    1846             : ! !INTERFACE:
    1847           0 :       SUBROUTINE ParPatternGhostToGhost( InComm, GA, GB, Pattern, mod_method, T )
    1848             : !
    1849             : ! !USES:
    1850             :       USE decompmodule, ONLY : DecompGlobalToLocal
    1851             :       USE ghostmodule, ONLY : GhostType, GhostInfo
    1852             :       USE mod_comm, ONLY : get_partneroffset
    1853             :       IMPLICIT NONE
    1854             : 
    1855             : ! !INPUT PARAMETERS:
    1856             :       INTEGER,  INTENT( IN )               :: InComm  ! # of PEs
    1857             :       TYPE(GhostType),   INTENT( IN )      :: GA      ! Source Ghost Decomp
    1858             :       TYPE(GhostType),   INTENT( IN )      :: GB      ! Target Ghost Decomp
    1859             :       INTEGER,  INTENT( IN ), OPTIONAL     :: mod_method ! contiguous or derived type
    1860             :       INTEGER, INTENT( IN ),  OPTIONAL     :: T       !
    1861             : ! !OUTPUT PARAMETERS:
    1862             :       TYPE(ParPatternType), INTENT( OUT )  :: Pattern ! Comm Pattern
    1863             : !
    1864             : ! !DESCRIPTION:
    1865             : !     This routine contructs a communication pattern for a 
    1866             : !     transformation from one ghosted decomposition to partitioned
    1867             : !     one, i.e., a so-called "transpose". The resulting communication 
    1868             : !     pattern can be used in ParBegin/EndTransfer with the decomposed 
    1869             : !     arrays as inputs.  
    1870             : !
    1871             : ! !SYSTEM ROUTINES:
    1872             : !    MPI_COMM_SIZE,  MPI_COMM_RANK, MPI_COMM_DUP
    1873             : !    MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
    1874             : !
    1875             : ! !REVISION HISTORY:
    1876             : !   02.01.10   Sawyer     Creation from DecompToDecomp
    1877             : !   02.07.16   Sawyer     Added data type T
    1878             : !   03.11.11   Mirin      Added optional argument mod_method
    1879             : !   07.03.11   Mirin      Generalized to different sized decompositions
    1880             : !   07.09.04   Dennis     Reduced amount of temporary memory usage
    1881             : !
    1882             : !EOP
    1883             : !-----------------------------------------------------------------------
    1884             : !BOC
    1885             : ! !LOCAL VARIABLES:
    1886             :       INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
    1887             :       INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
    1888             :       INTEGER NpesB, GlobalSizeB, LocalSizeB, BorderSizeB
    1889             :       INTEGER GroupSize, Iam, Ierror, OldPe, OldLocal 
    1890             :       INTEGER Ptr                                ! Pointer type
    1891             :       INTEGER  DataType
    1892             :       INTEGER              :: method
    1893             :       INTEGER              :: nCount, maxCount, ierr
    1894             : #ifdef _SMEMORY
    1895             :       TYPE (ParInfoType) :: Info
    1896             : #endif
    1897             : 
    1898           0 :       INTEGER, ALLOCATABLE :: Count(:)           ! # segments for each recv PE
    1899           0 :       INTEGER, ALLOCATABLE :: CountOut(:)        ! # segments for each send PE
    1900             : 
    1901           0 :       INTEGER, ALLOCATABLE :: DisplacementsA(:)  ! Generic displacements
    1902           0 :       INTEGER, ALLOCATABLE :: BlockSizesA(:)     ! Generic block sizes
    1903           0 :       INTEGER, ALLOCATABLE :: GlobalA(:)         ! Generic Local indices
    1904           0 :       INTEGER, ALLOCATABLE :: PeA(:)             ! Processor element numbers
    1905             : 
    1906           0 :       INTEGER, ALLOCATABLE :: DisplacementsB(:)  ! Displacements for B
    1907           0 :       INTEGER, ALLOCATABLE :: BlockSizesB(:)     ! Block sizes for B
    1908           0 :       INTEGER, ALLOCATABLE :: GlobalB(:)         ! Global indices for B
    1909             : 
    1910             :       CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
    1911             : 
    1912           0 :       IF (present(T)) THEN
    1913           0 :         DataType = T
    1914             :       ELSE
    1915           0 :         DataType = CPP_MPI_REAL8
    1916             :       ENDIF
    1917             : 
    1918           0 :       IF (present(mod_method)) THEN
    1919           0 :         method = mod_method
    1920             :       ELSE
    1921             :         method = 0     ! Default method - see mod_comm for description
    1922             :       ENDIF
    1923             : 
    1924             : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
    1925             : 
    1926           0 :       CALL GhostInfo( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
    1927           0 :       CALL GhostInfo( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
    1928             : 
    1929           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    1930           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    1931           0 :       CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
    1932             : 
    1933             : #ifdef _SMEMORY
    1934             : ! Calculate info about the pattern 
    1935           0 :       call ParCalcInfo(InComm,GA,GB, Info)
    1936           0 :       GlobalSizeA=Info%maxNumSeg
    1937           0 :       GlobalSizeB=Info%maxNumSeg
    1938             : #endif
    1939             : 
    1940           0 :       Pattern%Size = GroupSize
    1941           0 :       Pattern%Iam  = Iam
    1942             : !
    1943             : ! Allocate the number of entries and list head arrays
    1944             : !
    1945             : 
    1946             : !
    1947             : ! Allocate the patterns
    1948             : !
    1949           0 :       ALLOCATE( Pattern%SendDesc( NpesB ) )
    1950           0 :       Pattern%SendDesc(:)%method = method
    1951           0 :       if (iam .ge. NpesA) then
    1952           0 :          do ipe = 1, NpesB
    1953           0 :             ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
    1954           0 :             ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
    1955           0 :             Pattern%SendDesc(ipe)%Tot_Size = -1
    1956           0 :             Pattern%SendDesc(ipe)%Nparcels = -1
    1957           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    1958           0 :             Pattern%SendDesc(ipe)%Displacements(1) = -1
    1959           0 :             Pattern%SendDesc(ipe)%Blocksizes(1) = -1
    1960             :          enddo
    1961             :       endif
    1962             : 
    1963           0 :       ALLOCATE( Pattern%RecvDesc( NpesA ) )
    1964           0 :       Pattern%RecvDesc(:)%method = method
    1965           0 :       if (iam .ge. NpesB) then
    1966           0 :          do ipe = 1, NpesA
    1967           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
    1968           0 :             ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
    1969           0 :             Pattern%RecvDesc(ipe)%Tot_Size = -1
    1970           0 :             Pattern%RecvDesc(ipe)%Nparcels = -1
    1971           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    1972           0 :             Pattern%RecvDesc(ipe)%Displacements(1) = -1
    1973           0 :             Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
    1974             :          enddo
    1975             :       endif
    1976             : 
    1977             : !
    1978             : ! Local allocations
    1979             : !
    1980           0 :       ALLOCATE( DisplacementsA( GlobalSizeA ) )   ! Allocate for worst case
    1981           0 :       ALLOCATE( BlockSizesA( GlobalSizeA ) )      ! Allocate for worst case
    1982           0 :       ALLOCATE( GlobalA( GlobalSizeA ) )          ! Allocate for worst case
    1983           0 :       ALLOCATE( PeA( GlobalSizeA ) )              ! Allocate for worst case
    1984             : 
    1985           0 :       ALLOCATE( DisplacementsB( GlobalSizeB ) )   ! Allocate for worst case
    1986           0 :       ALLOCATE( BlockSizesB( GlobalSizeB ) )      ! Allocate for worst case
    1987           0 :       ALLOCATE( GlobalB( GlobalSizeB ) )          ! Allocate for worst case
    1988             : 
    1989           0 :       ALLOCATE( Count( GroupSize ) )
    1990           0 :       ALLOCATE( CountOut( GroupSize ) )
    1991             : 
    1992           0 :       JA        = 0
    1993           0 :       Count     = 0
    1994           0 :       Len      = 0
    1995           0 :       GlobalB     = 0   !  (needed for parexchangevector later)
    1996           0 :       BlocksizesB = 0   !  (needed for parexchangevector later)
    1997             : 
    1998           0 :       Num    = 0
    1999           0 :       Inc    = 0
    2000             : 
    2001           0 :     if (iam .lt. NpesB) then
    2002             : 
    2003             : !
    2004             : ! Parse through all the tags in the local segment
    2005           0 :       DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
    2006           0 :         OldPe     = -1         ! Set PE undefined
    2007           0 :         OldLocal  = 0          ! Set index value undefined
    2008           0 :         DO Tag=GB%Local%Head(iam+1)%StartTags(J), GB%Local%Head(iam+1)%EndTags(J)
    2009           0 :           IF ( Tag > 0 ) THEN       ! Active point
    2010             : !
    2011             : ! Determine the index and PE of this entry on A. This might be inlined later
    2012             : !
    2013           0 :             CALL DecompGlobalToLocal( GA%Decomp, Tag, Local, Pe )
    2014             : !
    2015             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    2016             : !
    2017           0 :             IF ( Pe /= OldPe  .OR. Local /= OldLocal+1 ) THEN
    2018           0 :               IF ( ja > 0 ) THEN
    2019           0 :                 BlockSizesA(ja) = Len
    2020           0 :                 Len = 0
    2021             :               ENDIF
    2022           0 :               ja = ja+1                     ! increment the segment index
    2023           0 :               DisplacementsA(ja) = Inc      ! Zero-based offset of local segment
    2024           0 :               GlobalA(ja) = Tag             ! The global tag of the desired datum
    2025           0 :               PeA(ja) = Pe                  ! Note the ID of the sender
    2026           0 :               Count(Pe+1) = Count(Pe+1)+1   ! Increment counter of segments
    2027             :             ENDIF
    2028           0 :             OldPe   = Pe                    ! Update old PE
    2029           0 :             OldLocal = Local                ! Update old local index
    2030           0 :             Len = Len+1                     ! Good -- segment is getting longer
    2031             :           ENDIF
    2032           0 :           Inc = Inc+1                       ! Increment local index
    2033             :         ENDDO
    2034             :       ENDDO
    2035             : !
    2036             : ! Clean up
    2037             : !
    2038           0 :       BlockSizesA(ja) = Len
    2039             : 
    2040             :       CPP_ASSERT_F90( JA .LE. GlobalSizeA )
    2041             : 
    2042             : !
    2043             : ! Now create the pattern from the displacements and block sizes
    2044             : !
    2045           0 :       Inc = 0
    2046           0 :       DO ipe = 1, NpesA
    2047             : !
    2048             : ! Find the segments which are relevant for the sender ipe
    2049             : ! Make compact arrays BlockSizes and Displacements 
    2050             : !
    2051           0 :         DO j = 1, ja
    2052           0 :           IF ( PeA(j) == ipe-1 ) THEN
    2053           0 :             Inc = Inc + 1
    2054           0 :             BlockSizesB(Inc) = BlockSizesA(j)
    2055           0 :             DisplacementsB(Inc) = DisplacementsA(j)
    2056           0 :             GlobalB(Inc)      = GlobalA(j)
    2057             :           ENDIF
    2058             :         ENDDO
    2059             :       ENDDO
    2060             :       CPP_ASSERT_F90( Inc .LE. GlobalSizeB )
    2061             : 
    2062             : !
    2063             : ! Create the receiver communication pattern
    2064             : !
    2065           0 :       Off = 0
    2066           0 :       DO ipe = 1, NpesA
    2067           0 :         Num = Count(ipe)
    2068             : #if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
    2069             :         write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
    2070             :                  "Displacements", DisplacementsB(Off+1:Off+Num), &
    2071             :                  "BlockSizes", BlockSizesB(Off+1:Off+Num)
    2072             : #endif
    2073             : 
    2074           0 :           IF ( Num > 0 .and. method > 0 ) THEN
    2075           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1),         &
    2076           0 :                  DisplacementsB(Off+1), DataType, Ptr, Ierror )
    2077           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    2078           0 :             Pattern%RecvDesc(ipe)%type = Ptr
    2079             :           ELSE
    2080           0 :             Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
    2081             :           ENDIF
    2082             : 
    2083           0 :           ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
    2084           0 :           ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
    2085           0 :           DO i=1, Num
    2086           0 :             Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
    2087           0 :             Pattern%RecvDesc(ipe)%BlockSizes(i)    = BlockSizesB(i+Off)
    2088             :           ENDDO
    2089           0 :           Pattern%RecvDesc(ipe)%Nparcels  = &
    2090           0 :             size (Pattern%RecvDesc(ipe)%Displacements)
    2091           0 :           Pattern%RecvDesc(ipe)%Tot_Size = &
    2092           0 :             sum  (Pattern%RecvDesc(ipe)%Blocksizes)
    2093           0 :           Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
    2094             : 
    2095           0 :         Off = Off + Num
    2096             :       ENDDO
    2097             : 
    2098             :     endif !  (iam .lt. NpesB)
    2099             : 
    2100             : !
    2101             : ! Now communicate what the receiver is expecting to the sender
    2102             : !
    2103             :       CALL ParExchangeVectorInt( InComm, Count, GlobalB,                &
    2104           0 :                                  CountOut, GlobalA  )
    2105             :       CALL ParExchangeVectorInt( InComm, Count, BlockSizesB,            &
    2106           0 :                                  CountOut, BlockSizesA )
    2107             : 
    2108           0 :     if (iam .lt. NpesA) then
    2109             : 
    2110             : !
    2111             : ! Sender A: BlockSizes and Displacements can now be stored
    2112             : !
    2113           0 :       Off = 0
    2114           0 :       DO ipe=1, NpesB
    2115           0 :         Num = CountOut(ipe)
    2116           0 :         DO i=1, Num
    2117           0 :           CALL DecompGlobalToLocal( GA%Local, GlobalA(i+Off), Local, Pe )
    2118           0 :           DisplacementsA(i+Off) = Local-1    ! zero-based displacement
    2119             :         ENDDO
    2120             : #if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
    2121             :         write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num,  &
    2122             :                  "Displacements", DisplacementsA(Off+1:Off+Num), &
    2123             :                  "BlockSizes", BlockSizesA(Off+1:Off+Num)
    2124             : #endif
    2125             : 
    2126           0 :           IF ( Num > 0 .and. method > 0 ) THEN
    2127           0 :             CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1),        &
    2128           0 :                  DisplacementsA(Off+1), DataType, Ptr, Ierror )
    2129           0 :             CALL MPI_TYPE_COMMIT( Ptr, Ierror )
    2130           0 :             Pattern%SendDesc(ipe)%type = Ptr
    2131             :           ELSE
    2132           0 :             Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
    2133             :           ENDIF
    2134             : 
    2135           0 :           ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
    2136           0 :           ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
    2137           0 :           DO i=1, Num
    2138           0 :             Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
    2139           0 :             Pattern%SendDesc(ipe)%BlockSizes(i)    = BlockSizesA(i+Off)
    2140             :           ENDDO
    2141           0 :           Pattern%SendDesc(ipe)%Nparcels  = &
    2142           0 :             size (Pattern%SendDesc(ipe)%Displacements)
    2143           0 :           Pattern%SendDesc(ipe)%Tot_Size = &
    2144           0 :             sum  (Pattern%SendDesc(ipe)%Blocksizes)
    2145           0 :           Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
    2146             : 
    2147           0 :         Off = Off + Num
    2148             :       ENDDO
    2149             : 
    2150             :     endif !  (iam .lt. NpesA)
    2151             : 
    2152           0 :       CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
    2153             : 
    2154             : 
    2155           0 :       DEALLOCATE( CountOut )
    2156           0 :       DEALLOCATE( Count )
    2157             : 
    2158           0 :       DEALLOCATE( PeA )
    2159           0 :       DEALLOCATE( GlobalA )
    2160           0 :       DEALLOCATE( BlockSizesA )
    2161           0 :       DEALLOCATE( DisplacementsA )
    2162             : 
    2163           0 :       DEALLOCATE( GlobalB )
    2164           0 :       DEALLOCATE( BlockSizesB )
    2165           0 :       DEALLOCATE( DisplacementsB )
    2166             : 
    2167             :       CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
    2168           0 :       RETURN
    2169             : !EOC
    2170           0 :       END SUBROUTINE ParPatternGhostToGhost
    2171             : !-----------------------------------------------------------------------
    2172             : 
    2173             : !-----------------------------------------------------------------------
    2174             : !BOP
    2175             : ! !IROUTINE:   ParPatternFree --- Free the communication pattern
    2176             : !
    2177             : ! !INTERFACE:
    2178           0 :       SUBROUTINE ParPatternFree( InComm, Pattern )
    2179             : !
    2180             : ! !USES:
    2181             :       IMPLICIT NONE
    2182             : 
    2183             : ! !INPUT PARAMETERS:
    2184             :       INTEGER,  INTENT( IN )                 :: InComm  ! # of PEs
    2185             : ! !INPUT/OUTPUT PARAMETERS:
    2186             :       TYPE(ParPatternType), INTENT( INOUT )  :: Pattern ! Comm Pattern
    2187             : !
    2188             : ! !DESCRIPTION:
    2189             : !     This routine frees a communication pattern.  
    2190             : !
    2191             : ! !SYSTEM ROUTINES:
    2192             : !     MPI_TYPE_FREE
    2193             : !
    2194             : ! !BUGS:
    2195             : !     The MPI_TYPE_FREE statement does not seem to work with FFC
    2196             : !
    2197             : ! !REVISION HISTORY:
    2198             : !   01.02.10   Sawyer     Creation
    2199             : !
    2200             : !EOP
    2201             : !-----------------------------------------------------------------------
    2202             : !BOC
    2203             : ! !LOCAL VARIABLES:
    2204             :       INTEGER  ipe, GroupSize, Pointer, Ierror, method
    2205             : 
    2206             :       CPP_ENTER_PROCEDURE( "PARPATTERNFREE" )
    2207             : 
    2208           0 :       method = Pattern%RecvDesc(1)%method
    2209             : 
    2210             : !
    2211             : ! First request the needed ghost values from other processors.
    2212             : !
    2213             : ! Free all the MPI derived types
    2214             : !
    2215           0 :       DO ipe=1, Pattern%Size
    2216           0 :         Pointer = Pattern%SendDesc(ipe)%type
    2217           0 :         IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
    2218           0 :           CALL MPI_TYPE_FREE( Pointer, Ierror )
    2219             :         ENDIF
    2220           0 :         Pointer = Pattern%RecvDesc(ipe)%type
    2221           0 :         IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
    2222           0 :           CALL MPI_TYPE_FREE( Pointer, Ierror )
    2223             :         ENDIF
    2224             :       ENDDO
    2225             : 
    2226           0 :       DO ipe=1, size(Pattern%RecvDesc)
    2227           0 :         DEALLOCATE( Pattern%RecvDesc(ipe)%Displacements )
    2228           0 :         DEALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes )
    2229             :       ENDDO
    2230           0 :       DO ipe=1, size(Pattern%SendDesc)
    2231           0 :         DEALLOCATE( Pattern%SendDesc(ipe)%Displacements )
    2232           0 :         DEALLOCATE( Pattern%SendDesc(ipe)%BlockSizes )
    2233             :       ENDDO
    2234             : 
    2235           0 :       DEALLOCATE( Pattern%SendDesc )
    2236           0 :       DEALLOCATE( Pattern%RecvDesc )
    2237             : 
    2238             :       CPP_LEAVE_PROCEDURE( "PARPATTERNFREE" )
    2239           0 :       RETURN
    2240             : !EOC
    2241             :       END SUBROUTINE ParPatternFree
    2242             : !-----------------------------------------------------------------------
    2243             : 
    2244             : 
    2245             : !-----------------------------------------------------------------------
    2246             : !BOP
    2247             : ! !IROUTINE: ParScatterReal --- Scatter slice to all PEs
    2248             : !
    2249             : ! !INTERFACE:
    2250           0 :       SUBROUTINE ParScatterReal ( InComm, Root, Slice, Decomp, Local )
    2251             : 
    2252             : ! !USES:
    2253             :       USE decompmodule, ONLY:  DecompType, Lists
    2254             :       IMPLICIT NONE
    2255             : 
    2256             : ! !INPUT PARAMETERS:
    2257             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2258             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2259             :       REAL(CPP_REAL8), INTENT( IN )  :: Slice(*)     ! Global Slice
    2260             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2261             : 
    2262             : ! !OUTPUT PARAMETERS:
    2263             :       REAL(CPP_REAL8), INTENT( OUT ) :: Local(*)     ! Local Slice
    2264             : 
    2265             : ! !DESCRIPTION:
    2266             : !     Given a decomposition of the domain, dole out a slice 
    2267             : !     (one-dimensional array) to all the constituent PEs as described
    2268             : !     by the decomposition Decomp.
    2269             : !
    2270             : !
    2271             : ! !SYSTEM ROUTINES:
    2272             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2273             : !
    2274             : ! !REVISION HISTORY:
    2275             : !   97.04.14   Sawyer     Creation
    2276             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2277             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2278             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2279             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2280             : !   97.07.03   Sawyer     Reformulated documentation
    2281             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2282             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2283             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2284             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2285             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2286             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2287             : !   98.03.30   Sawyer     Stats dimension corrected: Gsize*MPI_STATUS_SIZE
    2288             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2289             : !   00.07.07   Sawyer     Removed "1D" references
    2290             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2291             : !
    2292             : !EOP
    2293             : !-----------------------------------------------------------------------
    2294             : !BOC
    2295             : ! !LOCAL VARIABLES:
    2296             : 
    2297             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize
    2298             :       INTEGER Status( MPI_STATUS_SIZE )
    2299           0 :       Integer, allocatable :: Reqs(:), Stats(:)
    2300           0 :       REAL(CPP_REAL8), ALLOCATABLE    :: SendBuf(:)
    2301             : !
    2302             :       CPP_ENTER_PROCEDURE( "PARSCATTERREAL" )
    2303             : !
    2304           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2305           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2306             : 
    2307           0 :       allocate (Reqs(GroupSize))
    2308           0 :       allocate (Stats(GroupSize*MPI_STATUS_SIZE))
    2309             : 
    2310           0 :       IF ( Iam .EQ. Root ) THEN
    2311           0 :         ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
    2312           0 :         L = 0
    2313           0 :         DO I = 1, GroupSize
    2314             : !
    2315             : ! Pick out the array sections to be sent.
    2316             : ! This is the inverse of the operation in ParGather
    2317             : !
    2318           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2319           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2320           0 :               L = L+1
    2321           0 :               SendBuf(L) = Slice(K)
    2322             :             ENDDO
    2323             :           ENDDO
    2324             : !
    2325             : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
    2326             : !
    2327             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2328             : !
    2329           0 :           CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1),             &
    2330             :                           Decomp%NumEntries(I), CPP_MPI_REAL8,           &
    2331           0 :                           I-1, 0, InComm, Reqs(I), Ierror )
    2332             : 
    2333             :         ENDDO
    2334             :       ENDIF
    2335             : 
    2336             : !
    2337             : ! All receive from the root.  
    2338             : !
    2339             : ! The local array may be larger than that specified in the decomposition
    2340             : !
    2341           0 :       CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1),                    &
    2342             :                      CPP_MPI_REAL8,                                      &
    2343           0 :                      Root, 0, InComm, Status, Ierror )
    2344             : !
    2345             : ! Experience shows that we should wait for all the non-blocking
    2346             : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
    2347             : !
    2348           0 :       IF ( Iam .EQ. Root ) THEN
    2349           0 :         CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
    2350           0 :         DEALLOCATE( SendBuf )
    2351             :       ENDIF
    2352             : 
    2353             : !
    2354             : ! The following may be needed on some platforms to avoid an MPI bug.
    2355             : !
    2356           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2357             : 
    2358           0 :       deallocate (Reqs)
    2359           0 :       deallocate (Stats)
    2360             : 
    2361             :       CPP_LEAVE_PROCEDURE( "PARSCATTERREAL" )
    2362           0 :       RETURN
    2363             : !EOC
    2364           0 :       END SUBROUTINE ParScatterReal
    2365             : !-----------------------------------------------------------------------
    2366             : 
    2367             : 
    2368             : !-----------------------------------------------------------------------
    2369             : !BOP
    2370             : ! !IROUTINE: ParScatterReal4 --- Scatter slice to all PEs
    2371             : !
    2372             : ! !INTERFACE:
    2373           0 :       SUBROUTINE ParScatterReal4 ( InComm, Root, Slice, Decomp, Local )
    2374             : 
    2375             : ! !USES:
    2376             :       USE decompmodule, ONLY:  DecompType, Lists
    2377             :       IMPLICIT NONE
    2378             : 
    2379             : ! !INPUT PARAMETERS:
    2380             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2381             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2382             :       REAL(CPP_REAL4), INTENT( IN )  :: Slice(*)     ! Global Slice
    2383             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2384             : 
    2385             : ! !OUTPUT PARAMETERS:
    2386             :       REAL(CPP_REAL4), INTENT( OUT ) :: Local(*)     ! Local Slice
    2387             : 
    2388             : ! !DESCRIPTION:
    2389             : !     Given a decomposition of the domain, dole out a slice 
    2390             : !     (one-dimensional array) to all the constituent PEs as described
    2391             : !     by the decomposition Decomp.
    2392             : !
    2393             : !
    2394             : ! !SYSTEM ROUTINES:
    2395             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2396             : !
    2397             : ! !REVISION HISTORY:
    2398             : !   97.04.14   Sawyer     Creation
    2399             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2400             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2401             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2402             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2403             : !   97.07.03   Sawyer     Reformulated documentation
    2404             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2405             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2406             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2407             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2408             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2409             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2410             : !   98.03.30   Sawyer     Stats dimension corrected: Gsize*MPI_STATUS_SIZE
    2411             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2412             : !   00.07.07   Sawyer     Removed "1D" references
    2413             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2414             : !
    2415             : !EOP
    2416             : !-----------------------------------------------------------------------
    2417             : !BOC
    2418             : ! !LOCAL VARIABLES:
    2419             : 
    2420             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize
    2421             :       INTEGER Status( MPI_STATUS_SIZE )
    2422           0 :       Integer, allocatable :: Reqs(:), Stats(:)
    2423           0 :       REAL(CPP_REAL4), ALLOCATABLE    :: SendBuf(:)
    2424             : !
    2425             :       CPP_ENTER_PROCEDURE( "PARSCATTERREAL4" )
    2426             : !
    2427           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2428           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2429             : 
    2430           0 :       allocate (Reqs(GroupSize))
    2431           0 :       allocate (Stats(GroupSize*MPI_STATUS_SIZE))
    2432             : 
    2433           0 :       IF ( Iam .EQ. Root ) THEN
    2434           0 :         ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
    2435           0 :         L = 0
    2436           0 :         DO I = 1, GroupSize
    2437             : !
    2438             : ! Pick out the array sections to be sent.
    2439             : ! This is the inverse of the operation in ParGather
    2440             : !
    2441           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2442           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2443           0 :               L = L+1
    2444           0 :               SendBuf(L) = Slice(K)
    2445             :             ENDDO
    2446             :           ENDDO
    2447             : !
    2448             : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
    2449             : !
    2450             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2451             : !
    2452           0 :           CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1),             &
    2453             :                           Decomp%NumEntries(I), CPP_MPI_REAL4,           &
    2454           0 :                           I-1, 0, InComm, Reqs(I), Ierror )
    2455             : 
    2456             :         ENDDO
    2457             :       ENDIF
    2458             : 
    2459             : !
    2460             : ! All receive from the root.  
    2461             : !
    2462             : ! The local array may be larger than that specified in the decomposition
    2463             : !
    2464           0 :       CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1),                    &
    2465             :                      CPP_MPI_REAL4,                                      &
    2466           0 :                      Root, 0, InComm, Status, Ierror )
    2467             : !
    2468             : ! Experience shows that we should wait for all the non-blocking
    2469             : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
    2470             : !
    2471           0 :       IF ( Iam .EQ. Root ) THEN
    2472           0 :         CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
    2473           0 :         DEALLOCATE( SendBuf )
    2474             :       ENDIF
    2475             : 
    2476             : !
    2477             : ! The following may be needed on some platforms to avoid an MPI bug.
    2478             : !
    2479           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2480             : 
    2481           0 :       deallocate (Reqs)
    2482           0 :       deallocate (Stats)
    2483             : 
    2484             :       CPP_LEAVE_PROCEDURE( "PARSCATTERREAL4" )
    2485           0 :       RETURN
    2486             : !EOC
    2487           0 :       END SUBROUTINE ParScatterReal4
    2488             : !-----------------------------------------------------------------------
    2489             : 
    2490             : 
    2491             : !-----------------------------------------------------------------------
    2492             : !BOP
    2493             : ! !IROUTINE: ParScatterInt --- Scatter slice to all PEs
    2494             : !
    2495             : ! !INTERFACE:
    2496           0 :       SUBROUTINE ParScatterInt ( InComm, Root, Slice, Decomp, Local )
    2497             : 
    2498             : ! !USES:
    2499             :       USE decompmodule, ONLY:  DecompType, Lists
    2500             :       IMPLICIT NONE
    2501             : 
    2502             : ! !INPUT PARAMETERS:
    2503             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2504             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2505             :       INTEGER, INTENT( IN )          :: Slice(*)     ! Global Slice
    2506             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2507             : 
    2508             : ! !OUTPUT PARAMETERS:
    2509             :       INTEGER, INTENT( OUT )         :: Local(*)     ! Local Slice
    2510             : 
    2511             : ! !DESCRIPTION:
    2512             : !     Given a decomposition of the domain, dole out a slice 
    2513             : !     (one-dimensional array) to all the constituent PEs as described
    2514             : !     by the decomposition Decomp.
    2515             : !
    2516             : !
    2517             : ! !SYSTEM ROUTINES:
    2518             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2519             : !
    2520             : ! !REVISION HISTORY:
    2521             : !   97.04.14   Sawyer     Creation
    2522             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2523             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2524             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2525             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2526             : !   97.07.03   Sawyer     Reformulated documentation
    2527             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2528             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2529             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2530             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2531             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2532             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2533             : !   98.03.30   Sawyer     Stats dimension corrected: Gsize*MPI_STATUS_SIZE
    2534             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2535             : !   00.07.07   Sawyer     Removed "1D" references
    2536             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2537             : !
    2538             : !EOP
    2539             : !-----------------------------------------------------------------------
    2540             : !BOC
    2541             : ! !LOCAL VARIABLES:
    2542             : 
    2543             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize
    2544             :       INTEGER Status( MPI_STATUS_SIZE )
    2545           0 :       Integer, allocatable :: Reqs(:), Stats(:)
    2546           0 :       INTEGER, ALLOCATABLE    :: SendBuf(:)
    2547             : !
    2548             :       CPP_ENTER_PROCEDURE( "PARSCATTERINT" )
    2549             : !
    2550           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2551           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2552             : 
    2553           0 :       allocate (Reqs(GroupSize))
    2554           0 :       allocate (Stats(GroupSize*MPI_STATUS_SIZE))
    2555             : 
    2556           0 :       IF ( Iam .EQ. Root ) THEN
    2557           0 :         ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
    2558           0 :         L = 0
    2559           0 :         DO I = 1, GroupSize
    2560             : !
    2561             : ! Pick out the array sections to be sent.
    2562             : ! This is the inverse of the operation in ParGather
    2563             : !
    2564           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2565           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2566           0 :               L = L+1
    2567           0 :               SendBuf(L) = Slice(K)
    2568             :             ENDDO
    2569             :           ENDDO
    2570             : !
    2571             : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
    2572             : !
    2573             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2574             : !
    2575           0 :           CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1),              &
    2576             :                           Decomp%NumEntries(I), CPP_MPI_INTEGER,          &
    2577           0 :                           I-1, 0, InComm, Reqs(I), Ierror )
    2578             : 
    2579             :         ENDDO
    2580             :       ENDIF
    2581             : 
    2582             : !
    2583             : ! All receive from the root.  
    2584             : !
    2585             : ! The local array may be larger than that specified in the decomposition
    2586             : !
    2587           0 :       CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1),                     &
    2588             :                      CPP_MPI_INTEGER,                                     &
    2589           0 :                      Root, 0, InComm, Status, Ierror )
    2590             : !
    2591             : ! Experience shows that we should wait for all the non-blocking
    2592             : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
    2593             : !
    2594           0 :       IF ( Iam .EQ. Root ) THEN
    2595           0 :         CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
    2596           0 :         DEALLOCATE( SendBuf )
    2597             :       ENDIF
    2598             : 
    2599             : !
    2600             : ! The following may be needed on some platforms to avoid an MPI bug.
    2601             : !
    2602           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2603             : 
    2604           0 :       deallocate (Reqs)
    2605           0 :       deallocate (Stats)
    2606             : 
    2607             :       CPP_LEAVE_PROCEDURE( "PARSCATTERINT" )
    2608           0 :       RETURN
    2609             : !EOC
    2610           0 :       END SUBROUTINE ParScatterInt
    2611             : !-----------------------------------------------------------------------
    2612             : 
    2613             : 
    2614             : !-----------------------------------------------------------------------
    2615             : !BOP
    2616             : ! !IROUTINE: ParGatherReal --- Gather Slice from all PEs
    2617             : !
    2618             : ! !INTERFACE:  
    2619           0 :       SUBROUTINE ParGatherReal ( InComm, Root, Local, Decomp, Slice )
    2620             : 
    2621             : ! !USES:
    2622             :       USE decompmodule, ONLY:  DecompType, Lists
    2623             :       IMPLICIT NONE
    2624             : 
    2625             : ! !INPUT PARAMETERS:
    2626             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2627             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2628             :       REAL(CPP_REAL8), INTENT( IN )  :: Local(*)     ! Local Slice
    2629             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2630             : 
    2631             : ! !OUTPUT PARAMETERS:
    2632             :       REAL(CPP_REAL8), INTENT( OUT ) :: Slice(*)     ! Global Slice
    2633             : 
    2634             : ! !DESCRIPTION:
    2635             : !     Given a decomposition of the domain and a local portion of the
    2636             : !     total slice on each PE, gather together the portions into a
    2637             : !     global slice on the root PE
    2638             : !
    2639             : ! !SYSTEM ROUTINES:
    2640             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2641             : !
    2642             : ! !REVISION HISTORY:
    2643             : !   97.04.14   Sawyer     Creation
    2644             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2645             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2646             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2647             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2648             : !   97.07.03   Sawyer     Reformulated documentation
    2649             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2650             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2651             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2652             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2653             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2654             : !   98.01.29   Sawyer     Corrected assertions
    2655             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2656             : !   98.03.31   Sawyer     Stat dimension corrected: MPI_STATUS_SIZE
    2657             : !   98.04.22   Sawyer     Local no longer assumed shape: Local(*)
    2658             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2659             : !   00.07.07   Sawyer     Removed "1D" references
    2660             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2661             : !
    2662             : !EOP
    2663             : !-----------------------------------------------------------------------
    2664             : !BOC
    2665             : ! !LOCAL VARIABLES:
    2666             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
    2667             :       INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
    2668           0 :       REAL(CPP_REAL8), ALLOCATABLE    :: RecvBuf(:)
    2669             : !
    2670             :       CPP_ENTER_PROCEDURE( "PARGATHERREAL" )
    2671             : !
    2672           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2673           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2674             : !
    2675             : ! All PEs send their contribution to the root
    2676             : !
    2677           0 :       CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1),                   &
    2678             :                       CPP_MPI_REAL8,                                     &
    2679           0 :                       Root, Iam+3001, InComm, Req, Ierror )
    2680             : 
    2681           0 :       IF ( Iam .EQ. Root ) THEN
    2682           0 :         ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
    2683             : !
    2684             : ! On the Root PE receive from every other PE
    2685             : !
    2686           0 :         L = 0
    2687           0 :         DO I = 1, GroupSize
    2688             : !
    2689             : ! This is a blocking, synchronous recv.  All the
    2690             : ! sends should have been posted so it should not deadlock
    2691             : !
    2692             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2693             : !
    2694             :           CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
    2695           0 :           CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I),             &
    2696             :                          CPP_MPI_REAL8, I-1, I+3000, InComm,             &
    2697           0 :                          Status, Ierror )
    2698             : !
    2699             : ! This is the simple reverse mapping of that in ParScatter
    2700             : !
    2701           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2702           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2703           0 :               L = L + 1
    2704           0 :               Slice(K) = RecvBuf(L)
    2705             : #if defined(DEBUG_PARGATHERREAL)
    2706             :                 PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
    2707             : #endif
    2708             :             ENDDO
    2709             :           ENDDO
    2710             :         ENDDO
    2711           0 :         DEALLOCATE( RecvBuf )
    2712             :       ENDIF
    2713           0 :       CALL MPI_WAIT( Req, Stat, Ierror )
    2714             : !
    2715             : ! The following may be needed on some platforms to avoid an MPI bug.
    2716             : !
    2717           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2718             : 
    2719             :       CPP_LEAVE_PROCEDURE( "PARGATHERREAL" )
    2720           0 :       RETURN
    2721             : !EOC
    2722           0 :       END SUBROUTINE ParGatherReal
    2723             : !-----------------------------------------------------------------------
    2724             : 
    2725             : 
    2726             : !-----------------------------------------------------------------------
    2727             : !BOP
    2728             : ! !IROUTINE: ParGatherReal4 --- Gather Slice from all PEs
    2729             : !
    2730             : ! !INTERFACE:  
    2731           0 :       SUBROUTINE ParGatherReal4 ( InComm, Root, Local, Decomp, Slice )
    2732             : 
    2733             : ! !USES:
    2734             :       USE decompmodule, ONLY:  DecompType, Lists
    2735             :       IMPLICIT NONE
    2736             : 
    2737             : ! !INPUT PARAMETERS:
    2738             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2739             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2740             :       REAL(CPP_REAL4), INTENT( IN )  :: Local(*)     ! Local Slice
    2741             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2742             : 
    2743             : ! !OUTPUT PARAMETERS:
    2744             :       REAL(CPP_REAL4), INTENT( OUT ) :: Slice(*)     ! Global Slice
    2745             : 
    2746             : ! !DESCRIPTION:
    2747             : !     Given a decomposition of the domain and a local portion of the
    2748             : !     total slice on each PE, gather together the portions into a
    2749             : !     global slice on the root PE
    2750             : !
    2751             : ! !SYSTEM ROUTINES:
    2752             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2753             : !
    2754             : ! !REVISION HISTORY:
    2755             : !   97.04.14   Sawyer     Creation
    2756             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2757             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2758             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2759             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2760             : !   97.07.03   Sawyer     Reformulated documentation
    2761             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2762             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2763             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2764             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2765             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2766             : !   98.01.29   Sawyer     Corrected assertions
    2767             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2768             : !   98.03.31   Sawyer     Stat dimension corrected: MPI_STATUS_SIZE
    2769             : !   98.04.22   Sawyer     Local no longer assumed shape: Local(*)
    2770             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2771             : !   00.07.07   Sawyer     Removed "1D" references
    2772             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2773             : !
    2774             : !EOP
    2775             : !-----------------------------------------------------------------------
    2776             : !BOC
    2777             : ! !LOCAL VARIABLES:
    2778             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
    2779             :       INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
    2780           0 :       REAL(CPP_REAL4), ALLOCATABLE    :: RecvBuf(:)
    2781             : !
    2782             :       CPP_ENTER_PROCEDURE( "PARGATHERREAL4" )
    2783             : !
    2784           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2785           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2786             : !
    2787             : ! All PEs send their contribution to the root
    2788             : !
    2789           0 :       CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1),                   &
    2790             :                       CPP_MPI_REAL4,                                     &
    2791           0 :                       Root, Iam+3001, InComm, Req, Ierror )
    2792             : 
    2793           0 :       IF ( Iam .EQ. Root ) THEN
    2794           0 :         ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
    2795             : !
    2796             : ! On the Root PE receive from every other PE
    2797             : !
    2798           0 :         L = 0
    2799           0 :         DO I = 1, GroupSize
    2800             : !
    2801             : ! This is a blocking, synchronous recv.  All the
    2802             : ! sends should have been posted so it should not deadlock
    2803             : !
    2804             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2805             : !
    2806             :           CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
    2807           0 :           CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I),             &
    2808             :                          CPP_MPI_REAL4, I-1, I+3000, InComm,             &
    2809           0 :                          Status, Ierror )
    2810             : !
    2811             : ! This is the simple reverse mapping of that in ParScatter
    2812             : !
    2813           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2814           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2815           0 :               L = L + 1
    2816           0 :               Slice(K) = RecvBuf(L)
    2817             : #if defined(DEBUG_PARGATHERREAL4)
    2818             :                 PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
    2819             : #endif
    2820             :             ENDDO
    2821             :           ENDDO
    2822             :         ENDDO
    2823           0 :         DEALLOCATE( RecvBuf )
    2824             :       ENDIF
    2825           0 :       CALL MPI_WAIT( Req, Stat, Ierror )
    2826             : !
    2827             : ! The following may be needed on some platforms to avoid an MPI bug.
    2828             : !
    2829           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2830             :       CPP_LEAVE_PROCEDURE( "PARGATHERREAL4" )
    2831           0 :       RETURN
    2832             : !EOC
    2833           0 :       END SUBROUTINE ParGatherReal4
    2834             : !-----------------------------------------------------------------------
    2835             : 
    2836             : 
    2837             : !-----------------------------------------------------------------------
    2838             : !BOP
    2839             : ! !IROUTINE: ParGatherInt --- Gather Slice from all PEs
    2840             : !
    2841             : ! !INTERFACE:  
    2842           0 :       SUBROUTINE ParGatherInt ( InComm, Root, Local, Decomp, Slice )
    2843             : 
    2844             : ! !USES:
    2845             :       USE decompmodule, ONLY:  DecompType, Lists
    2846             :       IMPLICIT NONE
    2847             : 
    2848             : ! !INPUT PARAMETERS:
    2849             :       INTEGER, INTENT( IN )          :: InComm       ! Communicator
    2850             :       INTEGER, INTENT( IN )          :: Root         ! Root PE
    2851             :       INTEGER, INTENT( IN )          :: Local(*)     ! Local Slice
    2852             :       TYPE(DecompType), INTENT( IN ) :: Decomp       ! Decomp information
    2853             : 
    2854             : ! !OUTPUT PARAMETERS:
    2855             :       INTEGER, INTENT( OUT )         :: Slice(*)     ! Global Slice
    2856             : 
    2857             : ! !DESCRIPTION:
    2858             : !     Given a decomposition of the domain and a local portion of the
    2859             : !     total slice on each PE, gather together the portions into a
    2860             : !     global slice on the root PE
    2861             : !
    2862             : ! !SYSTEM ROUTINES:
    2863             : !     MPI_ISEND, MPI_RECV, MPI_COMM_RANK
    2864             : !
    2865             : ! !REVISION HISTORY:
    2866             : !   97.04.14   Sawyer     Creation
    2867             : !   97.04.16   Sawyer     Cleaned up for walk-through
    2868             : !   97.05.01   Sawyer     Use Decomp%Comm for all local info
    2869             : !   97.05.18   Sawyer     DecompType has moved to ParUtilitiesTypes
    2870             : !   97.05.29   Sawyer     Changed 2-D arrays to 1-D
    2871             : !   97.07.03   Sawyer     Reformulated documentation
    2872             : !   97.07.22   Sawyer     DecompType has moved to DecompModule
    2873             : !   97.12.01   Sawyer     Changed MPI_SSEND to MPI_ISEND
    2874             : !   97.12.05   Sawyer     Added InComm and Root as arguments
    2875             : !   97.12.05   Sawyer     Added logic to support intercommunicators
    2876             : !   98.01.24   Sawyer     Removed dependence on MPI derived types TESTED
    2877             : !   98.01.29   Sawyer     Corrected assertions
    2878             : !   98.02.05   Sawyer     Removed the use of intercommunicators
    2879             : !   98.03.31   Sawyer     Stat dimension corrected: MPI_STATUS_SIZE
    2880             : !   98.04.22   Sawyer     Local no longer assumed shape: Local(*)
    2881             : !   99.01.19   Sawyer     Dropped assumed-size arrays
    2882             : !   00.07.07   Sawyer     Removed "1D" references
    2883             : !   00.07.23   Sawyer     Implementation with shared memory arenas
    2884             : !
    2885             : !EOP
    2886             : !-----------------------------------------------------------------------
    2887             : !BOC
    2888             : ! !LOCAL VARIABLES:
    2889             :       INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
    2890             :       INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
    2891           0 :       INTEGER, ALLOCATABLE    :: RecvBuf(:)
    2892             : !
    2893             :       CPP_ENTER_PROCEDURE( "PARGATHERINT" )
    2894             : !
    2895           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierror )
    2896           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
    2897             : !
    2898             : ! All PEs send their contribution to the root
    2899             : !
    2900           0 :       CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), CPP_MPI_INTEGER,       &
    2901           0 :                       Root, Iam+3001, InComm, Req, Ierror )
    2902             : 
    2903           0 :       IF ( Iam .EQ. Root ) THEN
    2904           0 :         ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
    2905             : !
    2906             : ! On the Root PE receive from every other PE
    2907             : !
    2908           0 :         L = 0
    2909           0 :         DO I = 1, GroupSize
    2910             : !
    2911             : ! This is a blocking, synchronous recv.  All the
    2912             : ! sends should have been posted so it should not deadlock
    2913             : !
    2914             : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
    2915             : !
    2916             :           CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
    2917           0 :           CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I),                  &
    2918             :                          CPP_MPI_INTEGER, I-1, I+3000, InComm,                &
    2919           0 :                          Status, Ierror )
    2920             : !
    2921             : ! This is the simple reverse mapping of that in ParScatter
    2922             : !
    2923           0 :           DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
    2924           0 :             DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
    2925           0 :               L = L + 1
    2926           0 :               Slice(K) = RecvBuf(L)
    2927             : #if defined(DEBUG_PARGATHERINT)
    2928             :                 PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
    2929             : #endif
    2930             :             ENDDO
    2931             :           ENDDO
    2932             :         ENDDO
    2933           0 :         DEALLOCATE( RecvBuf )
    2934             :       ENDIF
    2935           0 :       CALL MPI_WAIT( Req, Stat, Ierror )
    2936             : !
    2937             : ! The following may be needed on some platforms to avoid an MPI bug.
    2938             : !
    2939           0 :       CALL MPI_BARRIER( InComm, Ierror )
    2940             : 
    2941             :       CPP_LEAVE_PROCEDURE( "PARGATHERINT" )
    2942           0 :       RETURN
    2943             : !EOC
    2944           0 :       END SUBROUTINE ParGatherInt
    2945             : !-----------------------------------------------------------------------
    2946             : 
    2947             : 
    2948             : !-----------------------------------------------------------------------
    2949             : !BOP
    2950             : ! !IROUTINE: ParBeginTransferReal --- Start an ASYNC Real Transfer
    2951             : !
    2952             : ! !INTERFACE:
    2953           0 :       SUBROUTINE ParBeginTransferReal(InComm, NrInPackets, NrOutPackets, &
    2954           0 :                                       Dest, Src, InBuf, InIA,            &
    2955           0 :                                       OutBuf, OutIA )
    2956             : 
    2957             : ! !USES:
    2958             :       IMPLICIT NONE
    2959             : 
    2960             : ! !INPUT PARAMETERS:
    2961             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    2962             :       INTEGER, INTENT( IN )       :: NrInPackets  ! Number of in packets
    2963             :       INTEGER, INTENT( IN )       :: NrOutPackets ! Number of out packets
    2964             :       INTEGER, INTENT( IN )       :: Dest(:)      ! PE destinations
    2965             :       INTEGER, INTENT( IN )       :: Src(:)       ! PE sources
    2966             :       REAL(CPP_REAL8), INTENT(IN) :: InBuf(:)     ! Input buffer
    2967             :       INTEGER, INTENT( IN )       :: InIA(:)      ! In packet counter
    2968             :       INTEGER, INTENT( IN )       :: OutIA(:)     ! Out packet counter
    2969             : 
    2970             : ! !OUTPUT PARAMETERS:
    2971             :       REAL(CPP_REAL8), INTENT( OUT ) :: OutBuf(:)  ! Output buffer
    2972             : 
    2973             : ! !DESCRIPTION: 
    2974             : !
    2975             : !     This routine initiates an async. transfer of an array InBuf
    2976             : !     partitioned into parcels defined by the arrays InIA and Dest
    2977             : !     to an output array OutBuf on another PE. InIA(1) contains 
    2978             : !     the number of reals to be sent to Dest(1), InIA(2) the number 
    2979             : !     of reals to be sent to Dest(2), etc.  Similarly, the array
    2980             : !     OutBuf on the calling PE is partitioned into parcels by OutIA
    2981             : !     and Src, with OutIA(1) the number of reals anticipated from
    2982             : !     Src(1), etc.  
    2983             : !
    2984             : !     The default implementation reads through the contiguous array 
    2985             : !     InBuf and sends the parcels to the PEs designated with an 
    2986             : !     asyncronous MPI\_ISEND.  Correspondingly it posts the receives 
    2987             : !     with an asynchronous MPI\_IRECV.
    2988             : !
    2989             : !     Wait handles InHandle(:) and OutHandle(:) are in common block.
    2990             : !
    2991             : ! !BUGS:
    2992             : !
    2993             : !     It is assumed that the buffers are passed to this routine by
    2994             : !     reference!!!!!!!!!!
    2995             : !
    2996             : !     The buffers may not be accessed until after the call to 
    2997             : !     ParEndTransferReal.
    2998             : !
    2999             : !
    3000             : ! !SYSTEM ROUTINES:
    3001             : !     MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
    3002             : !
    3003             : ! !REVISION HISTORY:
    3004             : !   97.09.26   Sawyer     Creation
    3005             : !   97.12.05   Sawyer     Renamed Comm to InComm to avoid collisions
    3006             : !   98.02.26   Sawyer     Added Dest, Src and Remote to clean up code
    3007             : !   98.04.16   Sawyer     Number of packets become input arguments
    3008             : !   98.09.04   Sawyer     Cleaned interface: handles in common, no Remote
    3009             : !   99.03.04   Sawyer     Inlined ParCalculateRemote
    3010             : !   99.06.01   Sawyer     Changed pointer arrays to INTEGER*8 for SGI
    3011             : !   00.08.07   Sawyer     Implementation with shared memory arenas
    3012             : !   01.09.27   Sawyer     Added multiple shared buffers for USE_MLP
    3013             : !
    3014             : !EOP
    3015             : !-----------------------------------------------------------------------
    3016             : !BOC
    3017             : 
    3018             : ! !LOCAL VARIABLES:
    3019             :       INTEGER Iam, GroupSize, Nr, Icnt, Packet, I, Ierr
    3020             : 
    3021             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERREAL" )
    3022             :       CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
    3023             :       CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
    3024             :       CPP_ASSERT_F90( NrOutPackets .LE. SIZE( Src ) )
    3025             :       CPP_ASSERT_F90( NrOutPackets .LE. SIZE( OutIA ) )
    3026             : 
    3027             : !
    3028             : ! Increment the ongoing transfer number
    3029           0 :       BegTrf = MOD(BegTrf,MAX_TRF) + 1
    3030             : 
    3031           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierr )
    3032           0 :       CALL MPI_COMM_SIZE( InComm, GroupSize, Ierr )
    3033             : 
    3034             : !
    3035             : !     MPI: Irecv over all processes
    3036             : !
    3037           0 :       Icnt = 1
    3038           0 :       DO Packet = 1, NrOutPackets
    3039           0 :         Nr = OutIA( Packet )
    3040           0 :         IF ( Nr .GT. 0 ) THEN
    3041             : #if defined( DEBUG_PARBEGINTRANSFERREAL )
    3042             :           PRINT *, "Iam ",Iam," posts recv ",Nr," from ", Src( Packet )
    3043             : #endif
    3044             : !
    3045             : ! Receive the buffers with MPI_Irecv. Non-blocking
    3046             : !
    3047             :           CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( OutBuf ) )
    3048           0 :           CALL MPI_IRECV( OutBuf( Icnt ), Nr,                            &
    3049           0 :                 CPP_MPI_REAL8, Src( Packet ), Src( Packet ),             &
    3050           0 :                 InComm, OutHandle(Packet,1,BegTrf), Ierr )
    3051             :         ELSE
    3052           0 :           OutHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
    3053             :         END IF
    3054           0 :         Icnt = Icnt + Nr
    3055             :       END DO
    3056             : !
    3057             : !     MPI: Isend over all processes
    3058             : !
    3059           0 :       Icnt = 1
    3060             :       CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
    3061             :       CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
    3062           0 :       DO Packet = 1, NrInPackets
    3063           0 :         Nr = InIA( Packet )
    3064           0 :         IF ( Nr .GT. 0 ) THEN
    3065             : #if defined( DEBUG_PARBEGINTRANSFERREAL )
    3066             :           PRINT *,"Iam ",Iam," posts send ",Nr," to ",Dest( Packet )
    3067             : #endif
    3068             : !
    3069             : !     Send the individual buffers with non-blocking sends
    3070             : !
    3071             :           CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( InBuf ) )
    3072           0 :           CALL MPI_ISEND ( InBuf( Icnt ), Nr,                            &
    3073           0 :                 CPP_MPI_REAL8, Dest( Packet ), Iam,                      &
    3074           0 :                 InComm, InHandle(Packet,1,BegTrf), Ierr )
    3075             :         ELSE
    3076           0 :           InHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
    3077             :         END IF
    3078           0 :         Icnt = Icnt + Nr
    3079             :       END DO
    3080             : !
    3081             : !
    3082             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERREAL" )
    3083           0 :       RETURN
    3084             : !EOC
    3085             :       END SUBROUTINE ParBeginTransferReal
    3086             : !-----------------------------------------------------------------------
    3087             : 
    3088             : 
    3089             : !-----------------------------------------------------------------------
    3090             : !BOP
    3091             : ! !IROUTINE: ParBeginTransferPattern1D --- Start ASYNC Pattern Transfer
    3092             : !
    3093             : ! !INTERFACE:
    3094           0 :       SUBROUTINE ParBeginTransferPattern1D( InComm, Pattern, InBuf, OutBuf )
    3095             : 
    3096             : ! !USES:
    3097             :       USE mod_comm, ONLY : mp_sendirr
    3098             :       IMPLICIT NONE
    3099             : 
    3100             : ! !INPUT PARAMETERS:
    3101             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3102             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern   ! Comm Pattern
    3103             :       REAL(CPP_REAL8), INTENT( IN )        :: InBuf(*)  ! Input buffer
    3104             : 
    3105             : ! !OUTPUT PARAMETERS:
    3106             :       REAL(CPP_REAL8), INTENT( OUT )       :: OutBuf(*) ! Output buffer
    3107             : 
    3108             : ! !DESCRIPTION: 
    3109             : !
    3110             : !     This routine initiates an async. transfer of an array InBuf.
    3111             : !     The communication pattern indicates the indices outgoing 
    3112             : !     values of InBuf and  incoming values for OutBuf.  This routine
    3113             : !     is fundamentally equivalent to ParBeginTransferReal; the use 
    3114             : !     of a communication pattern is largely a performance enhancement, 
    3115             : !     since it eliminates the need for intermediate buffering.
    3116             : !     
    3117             : !     Wait handles InHandle and OutHandle are module variables
    3118             : !     The buffers may not be accessed until after the call to 
    3119             : !     ParEndTransferReal.  
    3120             : !
    3121             : ! !BUGS:
    3122             : !
    3123             : !     It is assumed that the buffers are passed to this routine by
    3124             : !     reference.
    3125             : !
    3126             : ! !REVISION HISTORY:
    3127             : !   01.02.14   Sawyer     Creation from ParBeginTransferReal
    3128             : !   01.09.27   Sawyer     Added multiple shared buffers for USE_MLP
    3129             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3130             : !   03.06.24   Sawyer     All complexity now in mp_sendirr
    3131             : !
    3132             : !EOP
    3133             : !-----------------------------------------------------------------------
    3134             : !BOC
    3135             : 
    3136             : ! !LOCAL VARIABLES:
    3137             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
    3138             : 
    3139           0 :       CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3140             : !
    3141             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
    3142           0 :       RETURN
    3143             : !EOC
    3144             :       END SUBROUTINE ParBeginTransferPattern1D
    3145             : !-----------------------------------------------------------------------
    3146             : 
    3147             : 
    3148             : !-----------------------------------------------------------------------
    3149             : !BOP
    3150             : ! !IROUTINE: ParBeginTransferPattern1Dint --- Start ASYNC Pattern Transfer
    3151             : !
    3152             : ! !INTERFACE:
    3153           0 :       SUBROUTINE ParBeginTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf )
    3154             : 
    3155             : ! !USES:
    3156             :       USE mod_comm, ONLY : mp_sendirr_i4
    3157             :       IMPLICIT NONE
    3158             : 
    3159             : ! !INPUT PARAMETERS:
    3160             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3161             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern   ! Comm Pattern
    3162             :       INTEGER, INTENT( IN )                :: InBuf(*)  ! Input buffer
    3163             : 
    3164             : ! !OUTPUT PARAMETERS:
    3165             :       INTEGER, INTENT( OUT )               :: OutBuf(*) ! Output buffer
    3166             : 
    3167             : ! !DESCRIPTION: 
    3168             : !
    3169             : !     This routine initiates an async. transfer of an array InBuf.
    3170             : !     The communication pattern indicates the indices outgoing 
    3171             : !     values of InBuf and  incoming values for OutBuf.  This routine
    3172             : !     is fundamentally equivalent to ParBeginTransferReal; the use 
    3173             : !     of a communication pattern is largely a performance enhancement, 
    3174             : !     since it eliminates the need for intermediate buffering.
    3175             : !     
    3176             : !     Wait handles InHandle and OutHandle are module variables
    3177             : !     The buffers may not be accessed until after the call to 
    3178             : !     ParEndTransferReal.  
    3179             : !
    3180             : ! !BUGS:
    3181             : !
    3182             : !     It is assumed that the buffers are passed to this routine by
    3183             : !     reference.
    3184             : !
    3185             : ! !REVISION HISTORY:
    3186             : !   01.02.14   Sawyer     Creation from ParBeginTransferReal
    3187             : !   01.09.27   Sawyer     Added multiple shared buffers for USE_MLP
    3188             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3189             : !   03.06.24   Sawyer     All complexity now in mp_sendirr_i4
    3190             : ! 
    3191             : !EOP
    3192             : !-----------------------------------------------------------------------
    3193             : !BOC
    3194             : 
    3195             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
    3196             : 
    3197           0 :       CALL mp_sendirr_i4( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3198             : 
    3199             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
    3200           0 :       RETURN
    3201             : !EOC
    3202             :       END SUBROUTINE ParBeginTransferPattern1Dint
    3203             : !-----------------------------------------------------------------------
    3204             : 
    3205             : 
    3206             : !-----------------------------------------------------------------------
    3207             : !BOP
    3208             : ! !IROUTINE: ParBeginTransferPattern2D --- Start an ASYNC Pattern Transfer
    3209             : !
    3210             : ! !INTERFACE:
    3211           0 :       SUBROUTINE ParBeginTransferPattern2D( InComm, Pattern, InBuf, OutBuf )
    3212             : 
    3213             : ! !USES:
    3214             :       USE mod_comm, ONLY : mp_sendirr
    3215             :       IMPLICIT NONE
    3216             : 
    3217             : ! !INPUT PARAMETERS:
    3218             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3219             :       TYPE (ParPatternType), INTENT(IN)  :: Pattern      ! Comm Pattern
    3220             :       REAL(CPP_REAL8), INTENT(IN)        :: InBuf(:,:)   ! Input buffer
    3221             : 
    3222             : ! !OUTPUT PARAMETERS:
    3223             :       REAL(CPP_REAL8), INTENT(OUT)       :: OutBuf(:,:)  ! Output buffer
    3224             : 
    3225             : ! !DESCRIPTION: 
    3226             : !
    3227             : !     This routine initiates an async. transfer of an array InBuf.
    3228             : !     The communication pattern indicates the indices outgoing 
    3229             : !     values of InBuf and  incoming values for OutBuf.  This routine
    3230             : !     is fundamentally equivalent to ParBeginTransferReal; the use 
    3231             : !     of a communication pattern is largely a performance enhancement, 
    3232             : !     since it eliminates the need for intermediate buffering.
    3233             : !
    3234             : !     Wait handles InHandle and OutHandle are module variables
    3235             : !     The buffers may not be accessed until after the call to 
    3236             : !     ParEndTransferReal.  
    3237             : !
    3238             : ! !REVISION HISTORY:
    3239             : !   01.10.01   Sawyer     Creation from ParBeginTransferPattern
    3240             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3241             : !   03.06.24   Sawyer     All complexity now in mp_sendirr
    3242             : ! 
    3243             : !EOP
    3244             : !-----------------------------------------------------------------------
    3245             : !BOC
    3246             : 
    3247             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
    3248             : 
    3249           0 :       CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3250             : 
    3251             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
    3252           0 :       RETURN
    3253             : !EOC
    3254             :       END SUBROUTINE ParBeginTransferPattern2D
    3255             : !-----------------------------------------------------------------------
    3256             : 
    3257             : 
    3258             : !-----------------------------------------------------------------------
    3259             : !BOP
    3260             : ! !IROUTINE: ParBeginTransferPattern3D --- Start an ASYNC Pattern Transfer
    3261             : !
    3262             : ! !INTERFACE:
    3263           0 :       SUBROUTINE ParBeginTransferPattern3D( InComm, Pattern, InBuf, OutBuf )
    3264             : 
    3265             : ! !USES:
    3266             :       USE mod_comm, ONLY : mp_sendirr
    3267             :       IMPLICIT NONE
    3268             : 
    3269             : ! !INPUT PARAMETERS:
    3270             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3271             :       TYPE (ParPatternType), INTENT(IN)  :: Pattern      ! Comm Pattern
    3272             :       REAL(CPP_REAL8), INTENT(IN)        :: InBuf(:,:,:) ! Input buffer
    3273             : 
    3274             : ! !OUTPUT PARAMETERS:
    3275             :       REAL(CPP_REAL8), INTENT(OUT)       :: OutBuf(:,:,:)! Output buffer
    3276             : 
    3277             : ! !DESCRIPTION: 
    3278             : !
    3279             : !     This routine initiates an async. transfer of an array InBuf.
    3280             : !     The communication pattern indicates the indices outgoing 
    3281             : !     values of InBuf and  incoming values for OutBuf.  This routine
    3282             : !     is fundamentally equivalent to ParBeginTransferReal; the use 
    3283             : !     of a communication pattern is largely a performance enhancement, 
    3284             : !     since it eliminates the need for intermediate buffering.
    3285             : !
    3286             : !     Wait handles InHandle and OutHandle are module variables
    3287             : !     The buffers may not be accessed until after the call to 
    3288             : !     ParEndTransferReal.  
    3289             : !
    3290             : ! !REVISION HISTORY:
    3291             : !   01.10.01   Sawyer     Creation from ParBeginTransferPattern
    3292             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3293             : !   03.06.24   Sawyer     All complexity now in mp_sendirr
    3294             : ! 
    3295             : !EOP
    3296             : !-----------------------------------------------------------------------
    3297             : !BOC
    3298             : 
    3299             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
    3300             : 
    3301           0 :       CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3302             : 
    3303             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
    3304           0 :       RETURN
    3305             : !EOC
    3306             :       END SUBROUTINE ParBeginTransferPattern3D
    3307             : !-----------------------------------------------------------------------
    3308             : 
    3309             : 
    3310             : !-----------------------------------------------------------------------
    3311             : !BOP
    3312             : ! !IROUTINE: ParBeginTransferPattern4D --- Start an ASYNC Pattern Transfer
    3313             : !
    3314             : ! !INTERFACE:
    3315           0 :       SUBROUTINE ParBeginTransferPattern4D( InComm, Pattern, InBuf, OutBuf )
    3316             : 
    3317             : ! !USES:
    3318             :       USE mod_comm, ONLY : mp_sendirr
    3319             :       IMPLICIT NONE
    3320             : 
    3321             : ! !INPUT PARAMETERS:
    3322             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3323             :       TYPE (ParPatternType), INTENT(IN)  :: Pattern        ! Comm Pattern
    3324             :       REAL(CPP_REAL8), INTENT(IN)        :: InBuf(:,:,:,:) ! Input buffer
    3325             : 
    3326             : ! !OUTPUT PARAMETERS:
    3327             :       REAL(CPP_REAL8), INTENT(OUT)       :: OutBuf(:,:,:,:)! Output buffer
    3328             : 
    3329             : ! !DESCRIPTION: 
    3330             : !
    3331             : !     This routine initiates an async. transfer of an array InBuf.
    3332             : !     The communication pattern indicates the indices outgoing 
    3333             : !     values of InBuf and  incoming values for OutBuf.  This routine
    3334             : !     is fundamentally equivalent to ParBeginTransferReal; the use 
    3335             : !     of a communication pattern is largely a performance enhancement, 
    3336             : !     since it eliminates the need for intermediate buffering.
    3337             : !
    3338             : !     Wait handles InHandle and OutHandle are module variables
    3339             : !     The buffers may not be accessed until after the call to 
    3340             : !     ParEndTransferReal.  
    3341             : !
    3342             : ! !REVISION HISTORY:
    3343             : !   02.12.19   Sawyer     Creation from ParBeginTransferPattern
    3344             : !   03.06.24   Sawyer     All complexity now in mp_sendirr
    3345             : ! 
    3346             : !EOP
    3347             : !-----------------------------------------------------------------------
    3348             : !BOC
    3349             : 
    3350             :       CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
    3351             : 
    3352           0 :       CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3353             : 
    3354             :       CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
    3355           0 :       RETURN
    3356             : !EOC
    3357             :       END SUBROUTINE ParBeginTransferPattern4D
    3358             : !-----------------------------------------------------------------------
    3359             : 
    3360             : 
    3361             : !-----------------------------------------------------------------------
    3362             : !BOP
    3363             : ! !IROUTINE: ParEndTransferReal --- Complete an ASYNC Real Transfer
    3364             : !
    3365             : ! !INTERFACE:
    3366           0 :       SUBROUTINE ParEndTransferReal( InComm, NrInPackets, NrOutPackets,  &
    3367             :                                      Dest, Src, InBuf, InIA,             &
    3368             :                                      OutBuf, OutIA )
    3369             : 
    3370             : ! !USES:
    3371             :       IMPLICIT NONE
    3372             : 
    3373             : ! !INPUT PARAMETERS:
    3374             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3375             :       INTEGER, INTENT( IN )       :: NrInPackets  ! Number of in packets
    3376             :       INTEGER, INTENT( IN )       :: NrOutPackets ! Number of out packets
    3377             :       INTEGER, INTENT( IN )       :: Dest(:)      ! PE destinations
    3378             :       INTEGER, INTENT( IN )       :: Src(:)       ! PE sources
    3379             :       REAL(CPP_REAL8), INTENT(IN) :: InBuf(:)     ! Input buffer
    3380             :       INTEGER, INTENT( IN )       :: InIA(:)      ! Pointer array
    3381             :       INTEGER, INTENT( IN )       :: OutIA(:)     ! Pointer array
    3382             : 
    3383             : ! !INPUT/OUTPUT PARAMETERS:
    3384             :       REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:)! Output buffer
    3385             : 
    3386             : ! !DESCRIPTION: 
    3387             : !
    3388             : !     This routine completes an async. transfer of an array
    3389             : !     partitioned into parcels defined by the array InIA.  In the 
    3390             : !     MPI version, neither InBuf nor OutBuf is not used since
    3391             : !     that information was utilized in ParBeginTransferReal.
    3392             : !
    3393             : !     The link between StartTransfer and EndTransfer is made possible
    3394             : !     by the InHandle and OutHandle: they reflect the status of
    3395             : !     the ongoing transfer.  When this routine completes, a valid
    3396             : !     and accessible copy of the OutBuf is ready for use.
    3397             : !
    3398             : ! !BUGS:
    3399             : !
    3400             : !     It is assumed that the buffers are passed to this routine by
    3401             : !     reference! The buffers may not be accessed until after the 
    3402             : !     completion of ParEndTransferReal.  
    3403             : !
    3404             : !
    3405             : ! !SYSTEM ROUTINES:
    3406             : !     MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
    3407             : !
    3408             : ! !REVISION HISTORY:
    3409             : !   97.09.26   Sawyer     Creation
    3410             : !   97.12.05   Sawyer     Renamed Comm to InComm to avoid collisions
    3411             : !   98.02.26   Sawyer     Count through packets, not PEs
    3412             : !   98.04.16   Sawyer     Number of packets become input arguments
    3413             : !   98.09.04   Sawyer     Cleaned interface: handles in common
    3414             : !   99.03.05   Sawyer     Support for contiguous communicators in SHMEM
    3415             : !   99.04.22   Sawyer     Bug fix: replaced MPI_WAIT with MPI_WAITALL
    3416             : !   99.06.03   Sawyer     Bug fix: GroupSize in SHMEM_BARRIER
    3417             : !   00.07.28   Sawyer     Implemented with shared memory arenas
    3418             : !   01.09.27   Sawyer     Added multiple shared buffers for USE_MLP
    3419             : !
    3420             : !EOP
    3421             : !-----------------------------------------------------------------------
    3422             : !BOC
    3423             : ! !LOCAL VARIABLES:
    3424             :       INTEGER Iam, GroupSize, J, Offset, Packet, Ierr
    3425           0 :       INTEGER InStats(NrInPackets*MPI_STATUS_SIZE)
    3426           0 :       INTEGER OutStats(NrOutPackets*MPI_STATUS_SIZE)
    3427             : 
    3428             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERREAL" )
    3429             : 
    3430             : !
    3431             : ! Increment the receiver 
    3432           0 :       EndTrf = MOD(EndTrf,MAX_TRF)+1
    3433             : 
    3434             :       CPP_ASSERT_F90( NrInPackets .LE. MAX_PAX )
    3435           0 :       CALL MPI_WAITALL( NrInPackets, InHandle(:,1,EndTrf), InStats, Ierr )
    3436             :  
    3437             :       CPP_ASSERT_F90( NrOutPackets .LE. MAX_PAX )
    3438           0 :       CALL MPI_WAITALL( NrOutPackets, OutHandle(:,1,EndTrf), OutStats, Ierr )
    3439             : !
    3440             : ! WS 98.09.22 : This barrier needed to synchronize.
    3441             : !
    3442           0 :       CALL MPI_BARRIER( InComm, Ierr )
    3443             : 
    3444             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERREAL" )
    3445           0 :       RETURN
    3446             : !EOC
    3447             :       END SUBROUTINE ParEndTransferReal
    3448             : !-----------------------------------------------------------------------
    3449             : 
    3450             : 
    3451             : !-----------------------------------------------------------------------
    3452             : !BOP
    3453             : ! !IROUTINE: ParEndTransferPattern1D --- Complete ASYNC Pattern Transfer
    3454             : !
    3455             : ! !INTERFACE:
    3456           0 :       SUBROUTINE ParEndTransferPattern1D( InComm, Pattern, InBuf, OutBuf )
    3457             : 
    3458             : ! !USES:
    3459             :       USE mod_comm, ONLY : mp_recvirr
    3460             :       IMPLICIT NONE
    3461             : 
    3462             : ! !INPUT PARAMETERS:
    3463             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3464             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern   ! Comm Pattern
    3465             :       REAL(CPP_REAL8), INTENT( IN )        :: InBuf(*)  ! Input buffer
    3466             : 
    3467             : ! !INPUT/OUTPUT PARAMETERS:
    3468             :       REAL(CPP_REAL8), INTENT( INOUT )     :: OutBuf(*) ! Output buffer
    3469             : 
    3470             : ! !DESCRIPTION: 
    3471             : !
    3472             : !     This routine completes an async. transfer of an array communicated
    3473             : !     with a communication pattern.  
    3474             : !
    3475             : !     The link between StartTransfer and EndTransfer is made possible
    3476             : !     by the InHandle and OutHandle: they reflect the status of
    3477             : !     the ongoing transfer.  When this routine completes, a valid
    3478             : !     and accessible copy of the OutBuf is ready for use.
    3479             : !     The buffers may not be accessed until after the 
    3480             : !     completion of ParEndTransfer.  
    3481             : !
    3482             : ! !BUGS:
    3483             : !
    3484             : !     It is assumed that the buffers are passed to this routine by
    3485             : !     reference.
    3486             : !
    3487             : ! !REVISION HISTORY:
    3488             : !   01.02.14   Sawyer     Creation from ParEndTransferReal
    3489             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3490             : !   03.06.24   Sawyer     All complexity now in mp_recvirr
    3491             : !
    3492             : !EOP
    3493             : !-----------------------------------------------------------------------
    3494             : !BOC
    3495             : 
    3496             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
    3497             : 
    3498           0 :       CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3499             : 
    3500             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
    3501           0 :       RETURN
    3502             : !EOC
    3503             :       END SUBROUTINE ParEndTransferPattern1D
    3504             : !-----------------------------------------------------------------------
    3505             : 
    3506             : 
    3507             : !-----------------------------------------------------------------------
    3508             : !BOP
    3509             : ! !IROUTINE: ParEndTransferPattern1Dint --- Complete ASYNC Pattern Transfer
    3510             : !
    3511             : ! !INTERFACE:
    3512           0 :       SUBROUTINE ParEndTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf )
    3513             : 
    3514             : ! !USES:
    3515             :       USE mod_comm, ONLY : mp_recvirr_i4
    3516             :       IMPLICIT NONE
    3517             : 
    3518             : ! !INPUT PARAMETERS:
    3519             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3520             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern   ! Comm Pattern
    3521             :       INTEGER, INTENT( IN )                :: InBuf(*)  ! Input buffer
    3522             : 
    3523             : ! !INPUT/OUTPUT PARAMETERS:
    3524             :       INTEGER, INTENT( INOUT )             :: OutBuf(*) ! Output buffer
    3525             : 
    3526             : ! !DESCRIPTION: 
    3527             : !
    3528             : !     This routine completes an async. transfer of an array communicated
    3529             : !     with a communication pattern.  
    3530             : !
    3531             : !     The link between StartTransfer and EndTransfer is made possible
    3532             : !     by the InHandle and OutHandle: they reflect the status of
    3533             : !     the ongoing transfer.  When this routine completes, a valid
    3534             : !     and accessible copy of the OutBuf is ready for use.
    3535             : !     The buffers may not be accessed until after the 
    3536             : !     completion of ParEndTransfer.  
    3537             : !
    3538             : ! !BUGS:
    3539             : !
    3540             : !     It is assumed that the buffers are passed to this routine by
    3541             : !     reference.
    3542             : !
    3543             : ! !REVISION HISTORY:
    3544             : !   01.02.14   Sawyer     Creation from ParEndTransferReal
    3545             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3546             : !   03.06.24   Sawyer     All complexity now in mp_recvirr_i4
    3547             : !
    3548             : !EOP
    3549             : !-----------------------------------------------------------------------
    3550             : !BOC
    3551             : 
    3552             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
    3553             : 
    3554           0 :       CALL mp_recvirr_i4( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3555             : 
    3556             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
    3557           0 :       RETURN
    3558             : !EOC
    3559             :       END SUBROUTINE ParEndTransferPattern1Dint
    3560             : !-----------------------------------------------------------------------
    3561             : 
    3562             : 
    3563             : !-----------------------------------------------------------------------
    3564             : !BOP
    3565             : ! !IROUTINE: ParEndTransferPattern2D --- Complete an ASYNC Pattern Transfer
    3566             : !
    3567             : ! !INTERFACE:
    3568           0 :       SUBROUTINE ParEndTransferPattern2D( InComm, Pattern, InBuf, OutBuf )
    3569             : 
    3570             : ! !USES:
    3571             :       USE mod_comm, ONLY : mp_recvirr
    3572             :       IMPLICIT NONE
    3573             : 
    3574             : ! !INPUT PARAMETERS:
    3575             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3576             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern       ! Comm Pattern
    3577             :       REAL(CPP_REAL8), INTENT( IN )        :: InBuf(:,:)    ! Input buffer
    3578             : 
    3579             : ! !INPUT/OUTPUT PARAMETERS:
    3580             :       REAL(CPP_REAL8), INTENT( INOUT )     :: OutBuf(:,:)   ! Output buffer
    3581             : 
    3582             : ! !DESCRIPTION: 
    3583             : !
    3584             : !     This routine completes an async. transfer of an array communicated
    3585             : !     with a communication pattern.  
    3586             : !
    3587             : !     The link between StartTransfer and EndTransfer is made possible
    3588             : !     by the InHandle and OutHandle: they reflect the status of
    3589             : !     the ongoing transfer.  When this routine completes, a valid
    3590             : !     and accessible copy of the OutBuf is ready for use.
    3591             : !     The buffers may not be accessed until after the 
    3592             : !     completion of ParEndTransfer.  
    3593             : !
    3594             : ! !BUGS:
    3595             : !
    3596             : !     It is assumed that the buffers are passed to this routine by
    3597             : !     reference.
    3598             : !
    3599             : ! !REVISION HISTORY:
    3600             : !   01.10.01   Sawyer     Creation from ParEndTransferPattern
    3601             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3602             : !   03.06.24   Sawyer     All complexity now in mp_recvirr
    3603             : !
    3604             : !EOP
    3605             : !-----------------------------------------------------------------------
    3606             : !BOC
    3607             : 
    3608             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
    3609             : 
    3610           0 :       CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:),OutBuf(:,:) )
    3611             : 
    3612             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
    3613           0 :       RETURN
    3614             : !EOC
    3615             :       END SUBROUTINE ParEndTransferPattern2D
    3616             : !-----------------------------------------------------------------------
    3617             : 
    3618             : 
    3619             : !-----------------------------------------------------------------------
    3620             : !BOP
    3621             : ! !IROUTINE: ParEndTransferPattern3D --- Complete an ASYNC Pattern Transfer
    3622             : !
    3623             : ! !INTERFACE:
    3624           0 :       SUBROUTINE ParEndTransferPattern3D( InComm, Pattern, InBuf, OutBuf )
    3625             : 
    3626             : ! !USES:
    3627             :       USE mod_comm, ONLY : mp_recvirr
    3628             :       IMPLICIT NONE
    3629             : 
    3630             : ! !INPUT PARAMETERS:
    3631             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3632             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern       ! Comm Pattern
    3633             :       REAL(CPP_REAL8), INTENT( IN )        :: InBuf(:,:,:)  ! Input buffer
    3634             : 
    3635             : ! !INPUT/OUTPUT PARAMETERS:
    3636             :       REAL(CPP_REAL8), INTENT( INOUT )     :: OutBuf(:,:,:) ! Output buffer
    3637             : 
    3638             : ! !DESCRIPTION: 
    3639             : !
    3640             : !     This routine completes an async. transfer of an array communicated
    3641             : !     with a communication pattern.  
    3642             : !
    3643             : !     The link between StartTransfer and EndTransfer is made possible
    3644             : !     by the InHandle and OutHandle: they reflect the status of
    3645             : !     the ongoing transfer.  When this routine completes, a valid
    3646             : !     and accessible copy of the OutBuf is ready for use.
    3647             : !     The buffers may not be accessed until after the 
    3648             : !     completion of ParEndTransfer.  
    3649             : !
    3650             : ! !BUGS:
    3651             : !
    3652             : !     It is assumed that the buffers are passed to this routine by
    3653             : !     reference.
    3654             : !
    3655             : ! !REVISION HISTORY:
    3656             : !   01.10.01   Sawyer     Creation from ParEndTransferPattern
    3657             : !   02.08.13   Sawyer     Now uses mod_comm unless Use_Mpi_Types
    3658             : !   03.06.24   Sawyer     All complexity now in mp_recvirr
    3659             : !
    3660             : !EOP
    3661             : !-----------------------------------------------------------------------
    3662             : !BOC
    3663             : 
    3664             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
    3665             : 
    3666           0 :       CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:,:),OutBuf(:,:,:) )
    3667             : 
    3668             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
    3669           0 :       RETURN
    3670             : !EOC
    3671             :       END SUBROUTINE ParEndTransferPattern3D
    3672             : !-----------------------------------------------------------------------
    3673             : 
    3674             : 
    3675             : !-----------------------------------------------------------------------
    3676             : !BOP
    3677             : ! !IROUTINE: ParEndTransferPattern4D --- Complete an ASYNC Pattern Transfer
    3678             : !
    3679             : ! !INTERFACE:
    3680           0 :       SUBROUTINE ParEndTransferPattern4D( InComm, Pattern, InBuf, OutBuf )
    3681             : 
    3682             : ! !USES:
    3683             :       USE mod_comm, ONLY : mp_recvirr
    3684             :       IMPLICIT NONE
    3685             : 
    3686             : ! !INPUT PARAMETERS:
    3687             :       INTEGER, INTENT( IN )       :: InComm       ! Communicator
    3688             :       TYPE (ParPatternType), INTENT( IN )  :: Pattern        ! Comm Pattern
    3689             :       REAL(CPP_REAL8), INTENT( IN )        :: InBuf(:,:,:,:) ! Input buffer
    3690             : 
    3691             : ! !INPUT/OUTPUT PARAMETERS:
    3692             :       REAL(CPP_REAL8), INTENT( INOUT )     :: OutBuf(:,:,:,:)! Output buffer
    3693             : 
    3694             : ! !DESCRIPTION: 
    3695             : !
    3696             : !     This routine completes an async. transfer of an array communicated
    3697             : !     with a communication pattern.  
    3698             : !
    3699             : !     The link between StartTransfer and EndTransfer is made possible
    3700             : !     by the InHandle and OutHandle: they reflect the status of
    3701             : !     the ongoing transfer.  When this routine completes, a valid
    3702             : !     and accessible copy of the OutBuf is ready for use.
    3703             : !     The buffers may not be accessed until after the 
    3704             : !     completion of ParEndTransfer.  
    3705             : !
    3706             : ! !BUGS:
    3707             : !
    3708             : !     It is assumed that the buffers are passed to this routine by
    3709             : !     reference.
    3710             : !
    3711             : ! !REVISION HISTORY:
    3712             : !   02.12.19   Sawyer     Creation from ParEndTransferPattern
    3713             : !   03.06.24   Sawyer     All complexity now in mp_recvirr
    3714             : !
    3715             : !EOP
    3716             : !-----------------------------------------------------------------------
    3717             : !BOC
    3718             : 
    3719             :       CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
    3720             : 
    3721           0 :       CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
    3722             : 
    3723             :       CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
    3724           0 :       RETURN
    3725             : !EOC
    3726             :       END SUBROUTINE ParEndTransferPattern4D
    3727             : !-----------------------------------------------------------------------
    3728             : 
    3729             : 
    3730             : !-----------------------------------------------------------------------
    3731             : !BOP
    3732             : ! !IROUTINE: ParExchangeVectorReal --- Exchange a sparse packed vector
    3733             : !
    3734             : ! !INTERFACE:  
    3735           0 :       SUBROUTINE ParExchangeVectorReal ( InComm, LenInVector, InVector,  &
    3736             :                                          LenOutVector, OutVector )
    3737             : 
    3738             : ! !USES:
    3739             :       IMPLICIT NONE
    3740             : 
    3741             : ! !INPUT PARAMETERS:
    3742             :       INTEGER, INTENT( IN )   :: InComm            ! Communicator
    3743             :       INTEGER, INTENT( IN )   :: LenInVector( * )  ! Length on each PE
    3744             :       REAL(CPP_REAL8), INTENT( IN ):: InVector( * ) ! The input buffer
    3745             : 
    3746             : ! !OUTPUT PARAMETERS:
    3747             :       INTEGER, INTENT( OUT )  :: LenOutVector( * ) ! Length on each PE
    3748             :       REAL(CPP_REAL8), INTENT( OUT ) :: OutVector( * ) ! The output buffer
    3749             : 
    3750             : ! !DESCRIPTION:
    3751             : !
    3752             : !     This routine exchanges vectors stored in compressed format, i.e.,
    3753             : !     in so-called compressed sparse row (CSR) format, with other
    3754             : !     PEs.  In essence it first exchanges the lengths with
    3755             : !     MPI\_Alltoall, then the exchange of the actual vectors (can be
    3756             : !     different in size) using MPI\_AlltoallV.  Since the latter is
    3757             : !     inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
    3758             : !
    3759             : ! !SYSTEM ROUTINES:
    3760             : !     MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
    3761             : !
    3762             : ! !REVISION HISTORY:
    3763             : !   98.03.17   Sawyer     Creation from F77 version
    3764             : !   98.03.30   Sawyer     Removed assumed shape arrays due to problems
    3765             : !   99.01.18   Sawyer     Added barrier for safety
    3766             : !   99.03.08   Sawyer     USE_SHMEM version for CRAY only; untested
    3767             : !   99.06.01   Sawyer     USE_SHMEM version revised per comments from Tom
    3768             : !   00.07.28   Sawyer     Implemented with shared memory arenas
    3769             : !
    3770             : !EOP
    3771             : !-----------------------------------------------------------------------
    3772             : !BOC
    3773             : !
    3774             : ! !LOCAL VARIABLES:
    3775             :       INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
    3776             :       INTEGER :: Status(MPI_STATUS_SIZE)
    3777           0 :       Integer, allocatable :: Reqs(:), Stats(:)
    3778             : 
    3779             :       CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL" )
    3780             : 
    3781           0 :       CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
    3782           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierr )
    3783             : 
    3784           0 :       allocate (Reqs(Nsize))
    3785           0 :       allocate (Stats(Nsize*MPI_STATUS_SIZE))
    3786             : 
    3787             : #if defined( MY_ALLTOALL )
    3788             :       DO pe = 0, Nsize-1
    3789             : !
    3790             : ! Send the individual buffers with non-blocking sends
    3791             : !
    3792             :         nr = LenInVector( pe + 1 )
    3793             :         CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000,             &
    3794             :                         InComm, Reqs( pe+1 ), Ierr )
    3795             :       ENDDO
    3796             :       DO pe = 0, Nsize - 1
    3797             : !
    3798             : ! Receive the buffers with MPI_Recv. Now we are blocking.
    3799             : !
    3800             :         CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000,               &
    3801             :                        InComm, Status, Ierr )
    3802             :         LenOutVector(pe + 1) = nr
    3803             :       ENDDO
    3804             :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    3805             : #else
    3806             :       CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER,                 &
    3807             :                          LenOutVector, 1, CPP_MPI_INTEGER,                &
    3808           0 :                          InComm, Ierr )
    3809             : #endif
    3810             : !
    3811             : ! Over all processes
    3812             : !
    3813           0 :       icnt = 1
    3814           0 :       DO pe = 0, Nsize-1
    3815             : !
    3816             : ! Send the individual buffers with non-blocking sends
    3817             : !
    3818           0 :         nr = LenInVector( pe + 1 )
    3819           0 :         IF ( nr .gt. 0 ) THEN
    3820           0 :           CALL MPI_ISEND( InVector( icnt ), nr,                           &
    3821             :                           CPP_MPI_REAL8, pe, Iam+2000,                    &
    3822           0 :                           InComm, Reqs( pe+1 ), Ierr )
    3823             :         ELSE
    3824           0 :           Reqs( pe+1 ) = MPI_REQUEST_NULL
    3825             :         ENDIF
    3826           0 :         icnt = icnt + nr
    3827             :       ENDDO
    3828             : 
    3829             : !
    3830             : ! Over all processes
    3831             : !
    3832           0 :       icnt = 1
    3833           0 :       DO pe = 0, Nsize - 1
    3834             : !
    3835             : ! Receive the buffers with MPI_Recv. Now we are blocking. 
    3836             : !
    3837           0 :         nr = LenOutVector(pe + 1)
    3838           0 :         IF ( nr .gt. 0 ) THEN
    3839           0 :           CALL MPI_RECV( OutVector( icnt ), nr,                          &
    3840             :                          CPP_MPI_REAL8, pe, pe+2000,                     &
    3841           0 :                          InComm, Status, Ierr )
    3842             :         ENDIF
    3843           0 :         icnt = icnt + nr
    3844             :       ENDDO
    3845           0 :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    3846             : 
    3847           0 :       deallocate (Reqs)
    3848           0 :       deallocate (Stats)
    3849             : 
    3850             :       CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL" )
    3851             : 
    3852           0 :       RETURN
    3853             : !EOC
    3854             :       END SUBROUTINE ParExchangeVectorReal
    3855             : !-----------------------------------------------------------------------
    3856             : 
    3857             : !-----------------------------------------------------------------------
    3858             : !BOP
    3859             : ! !IROUTINE: ParExchangeVectorReal4 --- Exchange a sparse packed vector
    3860             : !
    3861             : ! !INTERFACE:  
    3862           0 :       SUBROUTINE ParExchangeVectorReal4 ( InComm, LenInVector, InVector,&
    3863             :                                           LenOutVector, OutVector )
    3864             : 
    3865             : ! !USES:
    3866             :       IMPLICIT NONE
    3867             : 
    3868             : ! !INPUT PARAMETERS:
    3869             :       INTEGER, INTENT( IN )   :: InComm            ! Communicator
    3870             :       INTEGER, INTENT( IN )   :: LenInVector( * )  ! Length on each PE
    3871             :       REAL(CPP_REAL4), INTENT( IN ):: InVector( * ) ! The input buffer
    3872             : 
    3873             : ! !OUTPUT PARAMETERS:
    3874             :       INTEGER, INTENT( OUT )  :: LenOutVector( * ) ! Length on each PE
    3875             :       REAL(CPP_REAL4), INTENT( OUT ) :: OutVector( * ) ! The output buffer
    3876             : 
    3877             : ! !DESCRIPTION:
    3878             : !
    3879             : !     This routine exchanges vectors stored in compressed format, i.e.,
    3880             : !     in so-called compressed sparse row (CSR) format, with other
    3881             : !     PEs.  In essence it first exchanges the lengths with
    3882             : !     MPI\_Alltoall, then the exchange of the actual vectors (can be
    3883             : !     different in size) using MPI\_AlltoallV.  Since the latter is
    3884             : !     inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
    3885             : !
    3886             : ! !SYSTEM ROUTINES:
    3887             : !     MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
    3888             : !
    3889             : ! !REVISION HISTORY:
    3890             : !   98.03.17   Sawyer     Creation from F77 version
    3891             : !   98.03.30   Sawyer     Removed assumed shape arrays due to problems
    3892             : !   99.01.18   Sawyer     Added barrier for safety
    3893             : !   99.03.08   Sawyer     USE_SHMEM version for CRAY only; untested
    3894             : !   99.06.01   Sawyer     USE_SHMEM version revised per comments from Tom
    3895             : !   00.07.28   Sawyer     Implemented with shared memory arenas
    3896             : !
    3897             : !EOP
    3898             : !-----------------------------------------------------------------------
    3899             : !BOC
    3900             : !
    3901             : ! !LOCAL VARIABLES:
    3902             :       INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
    3903             :       INTEGER :: Status(MPI_STATUS_SIZE)
    3904           0 :       Integer, allocatable :: Reqs(:), Stats(:)
    3905             : 
    3906             :       CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
    3907             : 
    3908           0 :       CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
    3909           0 :       CALL MPI_COMM_RANK( InComm, Iam, Ierr )
    3910             : 
    3911           0 :       allocate (Reqs(Nsize))
    3912           0 :       allocate (Stats(Nsize*MPI_STATUS_SIZE))
    3913             : 
    3914             : #if defined( MY_ALLTOALL )
    3915             :       DO pe = 0, Nsize-1
    3916             : !
    3917             : ! Send the individual buffers with non-blocking sends
    3918             : !
    3919             :         nr = LenInVector( pe + 1 )
    3920             :         CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000,             &
    3921             :                         InComm, Reqs( pe+1 ), Ierr )
    3922             :       ENDDO
    3923             :       DO pe = 0, Nsize - 1
    3924             : !
    3925             : ! Receive the buffers with MPI_Recv. Now we are blocking.
    3926             : !
    3927             :         CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000,               &
    3928             :                        InComm, Status, Ierr )
    3929             :         LenOutVector(pe + 1) = nr
    3930             :       ENDDO
    3931             :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    3932             : #else
    3933             :       CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER,                 &
    3934             :                          LenOutVector, 1, CPP_MPI_INTEGER,                &
    3935           0 :                          InComm, Ierr )
    3936             : #endif
    3937             : !
    3938             : ! Over all processes
    3939             : !
    3940           0 :       icnt = 1
    3941           0 :       DO pe = 0, Nsize-1
    3942             : !
    3943             : ! Send the individual buffers with non-blocking sends
    3944             : !
    3945           0 :         nr = LenInVector( pe + 1 )
    3946           0 :         IF ( nr .gt. 0 ) THEN
    3947           0 :           CALL MPI_ISEND( InVector( icnt ), nr,                           &
    3948             :                           CPP_MPI_REAL4, pe, Iam+2000,                    &
    3949           0 :                           InComm, Reqs( pe+1 ), Ierr )
    3950             :         ELSE
    3951           0 :           Reqs( pe+1 ) = MPI_REQUEST_NULL
    3952             :         ENDIF
    3953           0 :         icnt = icnt + nr
    3954             :       ENDDO
    3955             : 
    3956             : !
    3957             : ! Over all processes
    3958             : !
    3959           0 :       icnt = 1
    3960           0 :       DO pe = 0, Nsize - 1
    3961             : !
    3962             : ! Receive the buffers with MPI_Recv. Now we are blocking. 
    3963             : !
    3964           0 :         nr = LenOutVector(pe + 1)
    3965           0 :         IF ( nr .gt. 0 ) THEN
    3966           0 :           CALL MPI_RECV( OutVector( icnt ), nr,                          &
    3967             :                          CPP_MPI_REAL4, pe, pe+2000,                     &
    3968           0 :                          InComm, Status, Ierr )
    3969             :         ENDIF
    3970           0 :         icnt = icnt + nr
    3971             :       ENDDO
    3972           0 :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    3973             : 
    3974           0 :       deallocate (Reqs)
    3975           0 :       deallocate (Stats)
    3976             : 
    3977             :       CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
    3978             : 
    3979           0 :       RETURN
    3980             : !EOC
    3981             :       END SUBROUTINE ParExchangeVectorReal4
    3982             : !-----------------------------------------------------------------------
    3983             : 
    3984             : 
    3985             : !-----------------------------------------------------------------------
    3986             : !BOP
    3987             : ! !IROUTINE: ParExchangeVectorInt --- Exchange a sparse packed vector
    3988             : !
    3989             : ! !INTERFACE:  
    3990       70656 :       SUBROUTINE ParExchangeVectorInt ( InComm, LenInVector, InVector,   &
    3991             :                                          LenOutVector, OutVector )
    3992             : 
    3993             : ! !USES:
    3994             :       IMPLICIT NONE
    3995             : 
    3996             : ! !INPUT PARAMETERS:
    3997             :       INTEGER, INTENT( IN )   :: InComm            ! Communicator
    3998             :       INTEGER, INTENT( IN )   :: LenInVector( * )  ! Length on each PE
    3999             :       INTEGER, INTENT( IN )   :: InVector( * )     ! The input buffer
    4000             : 
    4001             : ! !OUTPUT PARAMETERS:
    4002             :       INTEGER, INTENT( OUT )  :: LenOutVector( * ) ! Length on each PE
    4003             :       INTEGER, INTENT( OUT )  :: OutVector( * )    ! The output buffer
    4004             : 
    4005             : ! !DESCRIPTION:
    4006             : !
    4007             : !     This routine exchanges vectors stored in compressed format, i.e.,
    4008             : !     in so-called compressed sparse row (CSR) format, with other
    4009             : !     PEs.  In essence it first exchanges the lengths with
    4010             : !     MPI\_Alltoall, then the exchange of the actual vectors (can be
    4011             : !     different in size) using MPI\_AlltoallV.  Since the latter is
    4012             : !     inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
    4013             : !
    4014             : ! !SYSTEM ROUTINES:
    4015             : !     MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
    4016             : !
    4017             : ! !REVISION HISTORY:
    4018             : !   98.03.17   Sawyer     Creation from F77 version
    4019             : !   98.03.30   Sawyer     Removed assumed shape arrays due to problems
    4020             : !   99.01.18   Sawyer     Added barrier for safety
    4021             : !   99.03.08   Sawyer     USE_SHMEM version for CRAY only; untested
    4022             : !   99.06.01   Sawyer     USE_SHMEM version revised per comments from Tom
    4023             : !   00.07.28   Sawyer     Implemented with shared memory arenas
    4024             : !
    4025             : !EOP
    4026             : !-----------------------------------------------------------------------
    4027             : !BOC
    4028             : !
    4029             : ! !LOCAL VARIABLES:
    4030             :       INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
    4031             :       INTEGER :: Status(MPI_STATUS_SIZE)
    4032       70656 :       Integer, allocatable :: Reqs(:), Stats(:)
    4033             : 
    4034             :       CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORINT" )
    4035             : 
    4036       70656 :       CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
    4037       70656 :       CALL MPI_COMM_RANK( InComm, Iam, Ierr )
    4038             : 
    4039      211968 :       allocate (Reqs(Nsize))
    4040      211968 :       allocate (Stats(Nsize*MPI_STATUS_SIZE))
    4041             : 
    4042             : #if defined( MY_ALLTOALL )
    4043             :       DO pe = 0, Nsize-1
    4044             : !
    4045             : ! Send the individual buffers with non-blocking sends
    4046             : !
    4047             :         nr = LenInVector( pe + 1 )
    4048             :         CALL MPI_ISEND( nr, 1,                                           &
    4049             :                         MPI_INTEGER, pe, Iam+3000,                       &
    4050             :                         InComm, Reqs( pe+1 ), Ierr )
    4051             :       ENDDO
    4052             :       DO pe = 0, Nsize - 1
    4053             : !
    4054             : ! Receive the buffers with MPI_Recv. Now we are blocking.
    4055             : !
    4056             :         CALL MPI_RECV( nr, 1,                                                 &
    4057             :                        MPI_INTEGER, pe, pe+3000,                              &
    4058             :                        InComm, Status, Ierr )
    4059             :         LenOutVector(pe + 1) = nr
    4060             :       ENDDO
    4061             :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    4062             : #else
    4063             :       CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER,                     &
    4064             :                          LenOutVector, 1, CPP_MPI_INTEGER,                    &
    4065       70656 :                          InComm, Ierr )
    4066             : #endif
    4067             : !
    4068             : ! Over all processes
    4069             : !
    4070       70656 :       icnt = 1
    4071    54334464 :       DO pe = 0, Nsize-1
    4072             : !
    4073             : ! Send the individual buffers with non-blocking sends
    4074             : !
    4075    54263808 :         nr = LenInVector( pe + 1 )
    4076    54263808 :         IF ( nr .gt. 0 ) THEN
    4077           0 :           CALL MPI_ISEND( InVector( icnt ), nr,                               &
    4078             :                           CPP_MPI_INTEGER, pe, Iam+2000,                      &
    4079      862848 :                           InComm, Reqs( pe+1 ), Ierr )
    4080             :         ELSE
    4081    53400960 :           Reqs( pe+1 ) = MPI_REQUEST_NULL
    4082             :         ENDIF
    4083    54334464 :         icnt = icnt + nr
    4084             :       ENDDO
    4085             : 
    4086             : !
    4087             : ! Over all processes
    4088             : !
    4089       70656 :       icnt = 1
    4090    54334464 :       DO pe = 0, Nsize - 1
    4091             : !
    4092             : ! Receive the buffers with MPI_Recv. Now we are blocking. 
    4093             : !
    4094    54263808 :         nr = LenOutVector(pe + 1)
    4095    54263808 :         IF ( nr .gt. 0 ) THEN
    4096           0 :           CALL MPI_RECV( OutVector( icnt ), nr,                               &
    4097             :                          CPP_MPI_INTEGER, pe, pe+2000,                        &
    4098      862848 :                          InComm, Status, Ierr )
    4099             :         ENDIF
    4100    54334464 :         icnt = icnt + nr
    4101             :       ENDDO
    4102       70656 :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    4103             : !
    4104             : ! WS 98.09.22 : This barrier needed to synchronize.  Why?
    4105             : !
    4106       70656 :       CALL MPI_BARRIER( InComm, Ierr )
    4107             : 
    4108       70656 :       deallocate (Reqs)
    4109       70656 :       deallocate (Stats)
    4110             : 
    4111             :       CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORINT" )
    4112             : 
    4113       70656 :       RETURN
    4114             : !EOC
    4115             :       END SUBROUTINE ParExchangeVectorInt
    4116             : !-----------------------------------------------------------------------
    4117             : 
    4118             : 
    4119             : !-----------------------------------------------------------------------
    4120             : !BOP
    4121             : ! !ROUTINE: ParCollectiveBarrier --- Barrier: Simplest collective op.
    4122             : !
    4123             : ! !INTERFACE:
    4124           0 :       SUBROUTINE ParCollectiveBarrier( InComm )
    4125             : 
    4126             : ! !USES:
    4127             :       IMPLICIT NONE
    4128             : ! !INPUT PARAMETERS:
    4129             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4130             : 
    4131             : ! !DESCRIPTION:
    4132             : !
    4133             : !     This routine performs a barrier only within the communicator InComm
    4134             : !     
    4135             : ! !REVISION HISTORY:
    4136             : !   00.09.10   Sawyer     Creation
    4137             : !
    4138             : !EOP
    4139             : !---------------------------------------------------------------------
    4140             : !BOC
    4141             :       INTEGER Ierror
    4142             : 
    4143           0 :       CALL MPI_Barrier(InComm, Ierror )
    4144             : 
    4145           0 :       RETURN
    4146             : !EOC
    4147             :       END SUBROUTINE ParCollectiveBarrier
    4148             : !-----------------------------------------------------------------------
    4149             : 
    4150             : !-----------------------------------------------------------------------
    4151             : !BOP
    4152             : ! !ROUTINE: ParCollective0D --- Perform global Collective of a scalar
    4153             : !
    4154             : ! !INTERFACE:
    4155           0 :       SUBROUTINE ParCollective0D( InComm, Op, Var )
    4156             : 
    4157             : ! !USES:
    4158             :       IMPLICIT NONE
    4159             : ! !INPUT PARAMETERS:
    4160             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4161             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4162             : 
    4163             : ! !INPUT/OUTPUT PARAMETERS:
    4164             :       REAL(CPP_REAL8), INTENT( INOUT ) :: Var  ! partial Var in, Var out
    4165             : 
    4166             : ! !DESCRIPTION:
    4167             : !
    4168             : !     This utility makes a collective operation over all processes in 
    4169             : !     communicator InComm.  
    4170             : !     
    4171             : ! !REVISION HISTORY:
    4172             : !   00.08.07   Sawyer     Creation
    4173             : !
    4174             : !EOP
    4175             : !---------------------------------------------------------------------
    4176             : !BOC
    4177             :       INTEGER Ierror
    4178             :       REAL(CPP_REAL8)    Tmp
    4179             : 
    4180           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4181           0 :         CALL MPI_BCAST( Var, 1, CPP_MPI_REAL8, 0, InComm, Ierror )
    4182             :       ELSE
    4183             :         CALL MPI_ALLREDUCE( Var, Tmp, 1, CPP_MPI_REAL8,                  &
    4184           0 :                             Op, InComm, Ierror )
    4185           0 :         Var = Tmp
    4186             :       ENDIF
    4187             : 
    4188           0 :       RETURN
    4189             : !EOC
    4190             :       END SUBROUTINE ParCollective0D
    4191             : !-----------------------------------------------------------------------
    4192             : 
    4193             : !-----------------------------------------------------------------------
    4194             : !BOP
    4195             : ! !ROUTINE: ParCollective1D --- Perform component-wise global Collective of a vector
    4196             : !
    4197             : ! !INTERFACE:
    4198       37632 :       SUBROUTINE ParCollective1D( InComm, Op, Im, Var )
    4199             : 
    4200             : ! !USES:
    4201             :       IMPLICIT NONE
    4202             : 
    4203             : ! !INPUT PARAMETERS:
    4204             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4205             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4206             :       INTEGER, INTENT( IN ) :: Im       ! Size of 1-D array
    4207             : 
    4208             : ! !INPUT/OUTPUT PARAMETERS:
    4209             :       REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
    4210             : 
    4211             : ! !DESCRIPTION:
    4212             : !
    4213             : !     This utility makes a collective operation over all processes in 
    4214             : !     communicator InComm.  
    4215             : !     
    4216             : ! !REVISION HISTORY:
    4217             : !   00.08.07   Sawyer     Creation
    4218             : !
    4219             : !EOP
    4220             : !---------------------------------------------------------------------
    4221             : !BOC
    4222             :       INTEGER Ierror
    4223       75264 :       REAL(CPP_REAL8)    Tmp(Im)
    4224             : 
    4225       37632 :       IF ( Op .EQ. BCSTOP ) THEN
    4226           0 :         CALL MPI_BCAST( Var, Im, CPP_MPI_REAL8, 0, InComm, Ierror )
    4227             :       ELSE
    4228             :         CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL8,                 &
    4229       37632 :                             Op, InComm, Ierror )
    4230      134400 :         Var = Tmp
    4231             :       ENDIF
    4232             : 
    4233       37632 :       RETURN
    4234             : !EOC
    4235             :       END SUBROUTINE ParCollective1D
    4236             : !-----------------------------------------------------------------------
    4237             : 
    4238             : !-----------------------------------------------------------------------
    4239             : !BOP
    4240             : ! !ROUTINE: ParCollective1DReal4 --- Perform component-wise global Collective of a vector
    4241             : !
    4242             : ! !INTERFACE:
    4243           0 :       SUBROUTINE ParCollective1DReal4( InComm, Op, Im, Var )
    4244             : 
    4245             : ! !USES:
    4246             :       IMPLICIT NONE
    4247             : 
    4248             : ! !INPUT PARAMETERS:
    4249             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4250             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4251             :       INTEGER, INTENT( IN ) :: Im       ! Size of 1-D array
    4252             : 
    4253             : ! !INPUT/OUTPUT PARAMETERS:
    4254             :       REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
    4255             : 
    4256             : ! !DESCRIPTION:
    4257             : !
    4258             : !     This utility makes a collective operation over all processes in 
    4259             : !     communicator InComm.  
    4260             : !     
    4261             : ! !REVISION HISTORY:
    4262             : !   00.08.07   Sawyer     Creation
    4263             : !
    4264             : !EOP
    4265             : !---------------------------------------------------------------------
    4266             : !BOC
    4267             :       INTEGER Ierror
    4268           0 :       REAL(CPP_REAL4)    Tmp(Im)
    4269             : 
    4270           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4271           0 :         CALL MPI_BCAST( Var, Im, CPP_MPI_REAL4, 0, InComm, Ierror )
    4272             :       ELSE
    4273             :         CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL4,                 &
    4274           0 :                             Op, InComm, Ierror )
    4275           0 :         Var = Tmp
    4276             :       ENDIF
    4277             : 
    4278           0 :       RETURN
    4279             : !EOC
    4280             :       END SUBROUTINE ParCollective1DReal4
    4281             : !-----------------------------------------------------------------------
    4282             : 
    4283             : !-----------------------------------------------------------------------
    4284             : !BOP
    4285             : ! !ROUTINE: ParCollective2D --- Perform component-wise collective operation
    4286             : !
    4287             : ! !INTERFACE:
    4288           0 :       SUBROUTINE ParCollective2D( InComm, Op, Im, Jm, Var )
    4289             : 
    4290             : ! !USES:
    4291             :       IMPLICIT NONE
    4292             : 
    4293             : ! !INPUT PARAMETERS:
    4294             :       INTEGER, INTENT( IN ) :: InComm     ! Communicator
    4295             :       INTEGER, INTENT( IN ) :: Op         ! Operation (see header)
    4296             :       INTEGER, INTENT( IN ) :: Im         ! First dimension of 2-D array
    4297             :       INTEGER, INTENT( IN ) :: Jm         ! Second dimension of 2-D array
    4298             : 
    4299             : ! !INPUT/OUTPUT PARAMETERS:
    4300             :       REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
    4301             : 
    4302             : ! !DESCRIPTION:
    4303             : !
    4304             : !     This utility makes a collective operation over all processes in 
    4305             : !     communicator InComm.  
    4306             : !     
    4307             : ! !REVISION HISTORY:
    4308             : !   00.08.07   Sawyer     Creation
    4309             : !
    4310             : !EOP
    4311             : !---------------------------------------------------------------------
    4312             : !BOC
    4313             :       INTEGER Ierror
    4314           0 :       REAL(CPP_REAL8)    Tmp(Im,Jm)
    4315             : 
    4316           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4317           0 :         CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL8, 0, InComm, Ierror )
    4318             :       ELSE
    4319             :         CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL8,              &
    4320           0 :                             Op, InComm, Ierror )
    4321           0 :         Var = Tmp
    4322             :       ENDIF
    4323             : 
    4324           0 :       RETURN
    4325             : !EOC
    4326             :       END SUBROUTINE ParCollective2D
    4327             : !-----------------------------------------------------------------------
    4328             : 
    4329             : !-----------------------------------------------------------------------
    4330             : !BOP
    4331             : ! !ROUTINE: ParCollective2DReal4 --- Perform component-wise collective operation
    4332             : !
    4333             : ! !INTERFACE:
    4334           0 :       SUBROUTINE ParCollective2DReal4( InComm, Op, Im, Jm, Var )
    4335             : 
    4336             : ! !USES:
    4337             :       IMPLICIT NONE
    4338             : 
    4339             : ! !INPUT PARAMETERS:
    4340             :       INTEGER, INTENT( IN ) :: InComm     ! Communicator
    4341             :       INTEGER, INTENT( IN ) :: Op         ! Operation (see header)
    4342             :       INTEGER, INTENT( IN ) :: Im         ! First dimension of 2-D array
    4343             :       INTEGER, INTENT( IN ) :: Jm         ! Second dimension of 2-D array
    4344             : 
    4345             : ! !INPUT/OUTPUT PARAMETERS:
    4346             :       REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
    4347             : 
    4348             : ! !DESCRIPTION:
    4349             : !
    4350             : !     This utility makes a collective operation over all processes in 
    4351             : !     communicator InComm.  
    4352             : !     
    4353             : ! !REVISION HISTORY:
    4354             : !   00.08.07   Sawyer     Creation
    4355             : !
    4356             : !EOP
    4357             : !---------------------------------------------------------------------
    4358             : !BOC
    4359             :       INTEGER Ierror
    4360           0 :       REAL(CPP_REAL4)    Tmp(Im,Jm)
    4361             : 
    4362           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4363           0 :         CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL4, 0, InComm, Ierror )
    4364             :       ELSE
    4365             :         CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL4,              &
    4366           0 :                             Op, InComm, Ierror )
    4367           0 :         Var = Tmp
    4368             :       ENDIF
    4369             : 
    4370           0 :       RETURN
    4371             : !EOC
    4372             :       END SUBROUTINE ParCollective2DReal4
    4373             : !-----------------------------------------------------------------------
    4374             : 
    4375             : !-----------------------------------------------------------------------
    4376             : !BOP
    4377             : ! !ROUTINE: ParCollective3D --- Perform component-wise global Collective of a vector
    4378             : !
    4379             : ! !INTERFACE:
    4380           0 :       SUBROUTINE ParCollective3D( InComm, Op, Im, Jm, Lm, Var )
    4381             : 
    4382             : ! !USES:
    4383             :       IMPLICIT NONE
    4384             : 
    4385             : ! !INPUT PARAMETERS:
    4386             :       INTEGER, INTENT( IN ) :: InComm     ! Communicator
    4387             :       INTEGER, INTENT( IN ) :: Op         ! Operation (see header)
    4388             :       INTEGER, INTENT( IN ) :: Im         ! First dimension of 3-D array
    4389             :       INTEGER, INTENT( IN ) :: Jm         ! Second dimension of 3-D array
    4390             :       INTEGER, INTENT( IN ) :: Lm         ! Third dimension of 3-D array
    4391             : 
    4392             : ! !INPUT/OUTPUT PARAMETERS:
    4393             :       REAL(CPP_REAL8), INTENT( INOUT ):: Var(Im,Jm,LM) ! partial Var in, Var out
    4394             : 
    4395             : ! !DESCRIPTION:
    4396             : !
    4397             : !     This utility makes a collective operation over all processes in 
    4398             : !     communicator InComm.  
    4399             : !     
    4400             : ! !REVISION HISTORY:
    4401             : !   00.08.07   Sawyer     Creation
    4402             : !
    4403             : !EOP
    4404             : !---------------------------------------------------------------------
    4405             : !BOC
    4406             :       INTEGER Ierror
    4407           0 :       REAL(CPP_REAL8) Tmp(Im,Jm,Lm)
    4408             : 
    4409           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4410           0 :         CALL MPI_BCAST( Var, Im*Jm*Lm, CPP_MPI_REAL8, 0, InComm, Ierror )
    4411             :       ELSE
    4412             :         CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm*Lm, CPP_MPI_REAL8,           &
    4413           0 :                             Op, InComm, Ierror )
    4414           0 :         Var = Tmp
    4415             :       ENDIF
    4416             : 
    4417           0 :       RETURN
    4418             : !EOC
    4419             :       END SUBROUTINE ParCollective3D
    4420             : !-----------------------------------------------------------------------
    4421             : 
    4422             : !-----------------------------------------------------------------------
    4423             : !BOP
    4424             : ! !ROUTINE: ParCollective0DInt --- Perform global Collective of a scalar
    4425             : !
    4426             : ! !INTERFACE:
    4427       32256 :       SUBROUTINE ParCollective0DInt( InComm, Op, Var )
    4428             : 
    4429             : ! !USES:
    4430             :       IMPLICIT NONE
    4431             : 
    4432             : ! !INPUT PARAMETERS:
    4433             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4434             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4435             : 
    4436             : ! !INPUT/OUTPUT PARAMETERS:
    4437             :       INTEGER, INTENT( INOUT ) :: Var   ! partial Var in, Var out
    4438             : 
    4439             : ! !DESCRIPTION:
    4440             : !
    4441             : !     This utility makes a collective operation over all processes in 
    4442             : !     communicator InComm.  
    4443             : !     
    4444             : ! !REVISION HISTORY:
    4445             : !   00.08.07   Sawyer     Creation
    4446             : !
    4447             : !EOP
    4448             : !---------------------------------------------------------------------
    4449             : !BOC
    4450             :       INTEGER Ierror
    4451             :       INTEGER    Tmp
    4452             : 
    4453       32256 :       IF ( Op .EQ. BCSTOP ) THEN
    4454           0 :         CALL MPI_BCAST( Var, 1, CPP_MPI_INTEGER, 0, InComm, Ierror )
    4455             :       ELSE
    4456       32256 :         CALL MPI_ALLREDUCE( Var,Tmp,1,CPP_MPI_INTEGER,Op,InComm,Ierror )
    4457       32256 :         Var = Tmp
    4458             :       ENDIF
    4459             : 
    4460       32256 :       RETURN
    4461             : !EOC
    4462             :       END SUBROUTINE ParCollective0DInt
    4463             : !-----------------------------------------------------------------------
    4464             : 
    4465             : !-----------------------------------------------------------------------
    4466             : !BOP
    4467             : ! !ROUTINE: ParCollective0DStr --- Perform global Collective of a string
    4468             : !
    4469             : ! !INTERFACE:
    4470           0 :       SUBROUTINE ParCollective0DStr( InComm, Op, Var )
    4471             : 
    4472             : ! !USES:
    4473             :       IMPLICIT NONE
    4474             : 
    4475             : ! !INPUT PARAMETERS:
    4476             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4477             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4478             : 
    4479             : ! !INPUT/OUTPUT PARAMETERS:
    4480             :       CHARACTER (LEN=*), INTENT( INOUT ) :: Var ! partial Var in, Var out
    4481             : 
    4482             : ! !DESCRIPTION:
    4483             : !
    4484             : !     This utility makes a collective operation over all processes in
    4485             : !     communicator InComm.
    4486             : !
    4487             : ! !REVISION HISTORY:
    4488             : !   00.08.07   Sawyer     Creation
    4489             : !
    4490             : !EOP
    4491             : !---------------------------------------------------------------------
    4492             : !BOC
    4493             :       INTEGER Ierror, StrLen
    4494             : 
    4495           0 :       StrLen = LEN(Var)
    4496           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4497           0 :         CALL MPI_BCAST( Var, StrLen, MPI_CHARACTER, 0, InComm, Ierror )
    4498             :       ELSE
    4499           0 :         write(iulog,*) "global reduction of string not supported"
    4500             :       ENDIF
    4501             : 
    4502           0 :       RETURN
    4503             : !EOC
    4504             :       END SUBROUTINE ParCollective0DStr
    4505             : !-----------------------------------------------------------------------
    4506             : 
    4507             : !-----------------------------------------------------------------------
    4508             : !BOP
    4509             : ! !ROUTINE: ParCollective1DStr --- Perform global Collective of a string
    4510             : !
    4511             : ! !INTERFACE:
    4512           0 :       SUBROUTINE ParCollective1DStr( InComm, Op, Im, Var )
    4513             : 
    4514             : ! !USES:
    4515             :       IMPLICIT NONE
    4516             : 
    4517             : ! !INPUT PARAMETERS:
    4518             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4519             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4520             :       INTEGER, INTENT( IN ) :: Im       ! Size of 1-D array
    4521             : 
    4522             : ! !INPUT/OUTPUT PARAMETERS:
    4523             :       CHARACTER (LEN=*), INTENT( INOUT ) :: Var(:)   ! partial Var in, Var out
    4524             : 
    4525             : ! !DESCRIPTION:
    4526             : !
    4527             : !     This utility makes a collective operation over all processes in 
    4528             : !     communicator InComm.  
    4529             : !     
    4530             : ! !REVISION HISTORY:
    4531             : !   00.08.07   Sawyer     Creation
    4532             : !
    4533             : !EOP
    4534             : !---------------------------------------------------------------------
    4535             : !BOC
    4536             :       INTEGER Ierror, StrLen
    4537             : 
    4538           0 :       StrLen = LEN(Var(1))
    4539           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4540           0 :         CALL MPI_BCAST( Var, Im*StrLen, MPI_CHARACTER, 0, InComm, Ierror )
    4541             :       ELSE
    4542           0 :         write(iulog,*) "global reduction of string not supported"
    4543             :       ENDIF
    4544             : 
    4545           0 :       RETURN
    4546             : !EOC
    4547             :       END SUBROUTINE ParCollective1DStr
    4548             : !-----------------------------------------------------------------------
    4549             : 
    4550             : !-----------------------------------------------------------------------
    4551             : !BOP
    4552             : ! !ROUTINE: ParCollective1DInt --- Perform component-wise global 
    4553             : !                                  collective operations of int vector
    4554             : !
    4555             : ! !INTERFACE:
    4556        6144 :       SUBROUTINE ParCollective1DInt( InComm, Op, Im, Var )
    4557             : 
    4558             : ! !USES:
    4559             :       IMPLICIT NONE
    4560             : 
    4561             : ! !INPUT PARAMETERS:
    4562             :       INTEGER, INTENT( IN ) :: InComm   ! Communicator
    4563             :       INTEGER, INTENT( IN ) :: Op       ! Operation (see header)
    4564             :       INTEGER, INTENT( IN ) :: Im       ! Size of 1-D array
    4565             : 
    4566             : ! !INPUT/OUTPUT PARAMETERS:
    4567             :       INTEGER, INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
    4568             : 
    4569             : ! !DESCRIPTION:
    4570             : !
    4571             : !     This utility makes a collective operation over all processes in 
    4572             : !     communicator InComm.  
    4573             : !     
    4574             : ! !REVISION HISTORY:
    4575             : !   00.08.07   Sawyer     Creation
    4576             : !
    4577             : !EOP
    4578             : !---------------------------------------------------------------------
    4579             : !BOC
    4580             :       INTEGER Ierror
    4581       12288 :       INTEGER Tmp(Im)
    4582             : 
    4583        6144 :       IF ( Op .EQ. BCSTOP ) THEN
    4584           0 :         CALL MPI_BCAST( Var, Im, CPP_MPI_INTEGER, 0, InComm, Ierror )
    4585             :       ELSE
    4586        6144 :         CALL MPI_ALLREDUCE( Var,Tmp,Im,CPP_MPI_INTEGER,Op,InComm,Ierror )
    4587     4724736 :         Var = Tmp
    4588             :       ENDIF
    4589             : 
    4590        6144 :       RETURN
    4591             : !EOC
    4592             :       END SUBROUTINE ParCollective1DInt
    4593             : !-----------------------------------------------------------------------
    4594             : !-----------------------------------------------------------------------
    4595             : !BOP
    4596             : ! !ROUTINE: ParCollective2DInt --- Perform component-wise collective op.
    4597             : !
    4598             : ! !INTERFACE:
    4599           0 :       SUBROUTINE ParCollective2DInt( InComm, Op, Im, Jm, Var )
    4600             : 
    4601             : ! !USES:
    4602             :       IMPLICIT NONE
    4603             : 
    4604             : ! !INPUT PARAMETERS:
    4605             :       INTEGER, INTENT( IN ) :: InComm     ! Communicator
    4606             :       INTEGER, INTENT( IN ) :: Op         ! Operation (see header)
    4607             :       INTEGER, INTENT( IN ) :: Im         ! First dimension of 2-D array
    4608             :       INTEGER, INTENT( IN ) :: Jm         ! Second dimension of 2-D array
    4609             : 
    4610             : ! !INPUT/OUTPUT PARAMETERS:
    4611             :       INTEGER, INTENT( INOUT ):: Var(Im,Jm) ! partial Var in, Var out
    4612             : 
    4613             : ! !DESCRIPTION:
    4614             : !
    4615             : !     This utility makes a collective operation over all processes in 
    4616             : !     communicator InComm.  
    4617             : !     
    4618             : ! !REVISION HISTORY:
    4619             : !   00.08.07   Sawyer     Creation
    4620             : !
    4621             : !EOP
    4622             : !---------------------------------------------------------------------
    4623             : !BOC
    4624             :       INTEGER Ierror
    4625           0 :       INTEGER Tmp(Im,Jm)
    4626             : 
    4627           0 :       IF ( Op .EQ. BCSTOP ) THEN
    4628           0 :         CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_INTEGER, 0, InComm, Ierror )
    4629             :       ELSE
    4630             :         CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_INTEGER,           &
    4631           0 :                             Op, InComm, Ierror )
    4632           0 :         Var = Tmp
    4633             :       ENDIF
    4634             : 
    4635           0 :       RETURN
    4636             : !EOC
    4637             :       END SUBROUTINE ParCollective2DInt
    4638             : !-----------------------------------------------------------------------
    4639             : # ifdef _SMEMORY
    4640             : !-----------------------------------------------------------------------
    4641             : !BOP
    4642             : ! !IROUTINE: ParExchangeLength --- Exchange a sparse packed vector
    4643             : !
    4644             : ! !INTERFACE:  
    4645       35328 :       SUBROUTINE ParExchangeLength ( InComm, LenInVector, LenOutVector)
    4646             : 
    4647             : ! !USES:
    4648             :       IMPLICIT NONE
    4649             : 
    4650             : ! !INPUT PARAMETERS:
    4651             :       INTEGER, INTENT( IN )   :: InComm            ! Communicator
    4652             :       INTEGER, INTENT( IN )   :: LenInVector( * )  ! Length on each PE
    4653             : 
    4654             : ! !OUTPUT PARAMETERS:
    4655             :       INTEGER, INTENT( OUT )  :: LenOutVector( * ) ! Length on each PE
    4656             : 
    4657             : ! !DESCRIPTION:
    4658             : !
    4659             : !     This routine exchanges vectors stored in compressed format, i.e.,
    4660             : !     in so-called compressed sparse row (CSR) format, with other
    4661             : !     PEs.  In essence it first exchanges the lengths with
    4662             : !     MPI\_Alltoall, then the exchange of the actual vectors (can be
    4663             : !     different in size) using MPI\_AlltoallV.  Since the latter is
    4664             : !     inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
    4665             : !
    4666             : ! !SYSTEM ROUTINES:
    4667             : !     MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
    4668             : !
    4669             : ! !REVISION HISTORY:
    4670             : !   98.03.17   Sawyer     Creation from F77 version
    4671             : !   98.03.30   Sawyer     Removed assumed shape arrays due to problems
    4672             : !   99.01.18   Sawyer     Added barrier for safety
    4673             : !   99.03.08   Sawyer     USE_SHMEM version for CRAY only; untested
    4674             : !   99.06.01   Sawyer     USE_SHMEM version revised per comments from Tom
    4675             : !   00.07.28   Sawyer     Implemented with shared memory arenas
    4676             : !
    4677             : !EOP
    4678             : !-----------------------------------------------------------------------
    4679             : !BOC
    4680             : !
    4681             : ! !LOCAL VARIABLES:
    4682             :       INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
    4683             :       INTEGER :: Status(MPI_STATUS_SIZE)
    4684       35328 :       Integer, allocatable :: Reqs(:), Stats(:)
    4685             : 
    4686             :       CPP_ENTER_PROCEDURE( "PAREXCHANGELENGTH" )
    4687             : 
    4688       35328 :       CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
    4689       35328 :       CALL MPI_COMM_RANK( InComm, Iam, Ierr )
    4690             : 
    4691      105984 :       allocate (Reqs(Nsize))
    4692      105984 :       allocate (Stats(Nsize*MPI_STATUS_SIZE))
    4693             : 
    4694             : #if defined( MY_ALLTOALL )
    4695             :       DO pe = 0, Nsize-1
    4696             : !
    4697             : ! Send the individual buffers with non-blocking sends
    4698             : !
    4699             :         nr = LenInVector( pe + 1 )
    4700             :         CALL MPI_ISEND( nr, 1,                                           &
    4701             :                         MPI_INTEGER, pe, Iam+3000,                       &
    4702             :                         InComm, Reqs( pe+1 ), Ierr )
    4703             :       ENDDO
    4704             :       DO pe = 0, Nsize - 1
    4705             : !
    4706             : ! Receive the buffers with MPI_Recv. Now we are blocking.
    4707             : !
    4708             :         CALL MPI_RECV( nr, 1,                                                 &
    4709             :                        MPI_INTEGER, pe, pe+3000,                              &
    4710             :                        InComm, Status, Ierr )
    4711             :         LenOutVector(pe + 1) = nr
    4712             :       ENDDO
    4713             :       CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
    4714             : 
    4715             :       deallocate (Reqs)
    4716             :       deallocate (Stats)
    4717             : 
    4718             : #else
    4719             :       CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER,                     &
    4720             :                          LenOutVector, 1, CPP_MPI_INTEGER,                    &
    4721       35328 :                          InComm, Ierr )
    4722             : #endif
    4723       35328 :       CALL MPI_BARRIER( InComm, Ierr )
    4724             : 
    4725             : 
    4726             :       CPP_LEAVE_PROCEDURE( "PAREXCHANGELENGTH" )
    4727             : 
    4728       35328 :       RETURN
    4729             : !EOC
    4730       35328 :       END SUBROUTINE ParExchangeLength
    4731             : !-----------------------------------------------------------------------
    4732             : !-----------------------------------------------------------------------
    4733             : !BOP
    4734             : ! !IROUTINE:   ParCalcInfoDecompToGhost --- calculates info about the pattern
    4735             : !
    4736             : ! !INTERFACE:
    4737        7680 :       subroutine ParCalcInfoDecompToGhost(InComm, DA,GB,Info)
    4738             : !
    4739             : ! !USES:
    4740             :       USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
    4741             :       USE ghostmodule, ONLY : GhostType,GhostInfo
    4742             :       IMPLICIT NONE
    4743             : 
    4744             : ! !INPUT PARAMETERS:
    4745             :       integer, intent(in)           :: InComm ! communicator
    4746             :       type(DecompType), intent(in)  :: DA   ! Source Decomp Desc
    4747             :       type(GhostType) , intent(in)  :: GB   ! Destination Ghost Desc
    4748             : 
    4749             : ! !OUTPUT PARAMETERS:
    4750             :       type (ParInfoType), intent(out) :: Info  ! Info structure
    4751             : !
    4752             : ! !DESCRIPTION:
    4753             : !     This routine calulcates the information about a communication 
    4754             : !     pattern that transforms from one decomposition to another, 
    4755             : !     i.e., a so-called "transpose".  This is a copy of an algorithm 
    4756             : !     from the ParPatternDecompToGhost subroutine.
    4757             : !
    4758             : ! !SYSTEM ROUTINES:
    4759             : !    MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
    4760             : !
    4761             : ! !REVISION HISTORY:
    4762             : !   07.09.04   Dennis     Creation based on algorithm in ParPatternDecompToGhost
    4763             : !
    4764             : !EOP
    4765             : !-----------------------------------------------------------------------
    4766             : !BOC
    4767             : ! !LOCAL VARIABLES:
    4768             :       integer :: nTags,oldpe,oldlocal,sTag,eTag,nCount
    4769             :       integer :: j,pe,local,tag,ierr,iam,npes
    4770             :       integer :: npesA,npesB,tmpA,tmp1B,tmp2B,tmp3B
    4771        7680 :       integer, allocatable :: sCount(:),rCount(:)
    4772             :   
    4773        7680 :       call DecompInfo(DA,npesA,tmpA)
    4774        7680 :       call GhostInfo(GB,npesB,tmp1B,tmp2B,tmp3B)
    4775             : 
    4776        7680 :       call MPI_COMM_SIZE(InComm,npes,ierr)
    4777        7680 :       call MPI_COMM_RANK(InComm,iam,ierr)
    4778             : 
    4779       30720 :       allocate(sCount(npes),rCount(npes))
    4780     5905920 :       sCount=0
    4781     5905920 :       rCount=0
    4782        7680 :       if(iam .lt. npesB)  then 
    4783             : ! Parse through all the tags in the local segment
    4784        7680 :         nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
    4785      361984 :         do j=1,nTags
    4786      354304 :           oldpe = -1
    4787      354304 :           oldlocal = 0 
    4788      354304 :           sTag = GB%Local%Head(iam+1)%StartTags(j)
    4789      354304 :           eTag = GB%Local%Head(iam+1)%EndTags(j)
    4790    41944576 :           do tag = sTag,eTag
    4791    41936896 :             if(tag > 0) then 
    4792             : !
    4793             : ! Determine the index and PE of this entry on A. This might be inlined later
    4794             : !
    4795    41250816 :               call DecompGlobalToLocal(DA,tag,Local,Pe)
    4796             : !
    4797             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    4798             : !
    4799    41250816 :               if( pe /= oldpe .or. local /= oldlocal+1) then 
    4800     1718784 :                 sCount(pe+1) = sCount(pe+1) + 1
    4801             :               endif
    4802    41250816 :               oldpe = pe  ! Update PE
    4803    41250816 :               oldlocal = local  ! Update local index
    4804             :             endif
    4805             :           enddo
    4806             :         enddo
    4807             :       endif
    4808             :       
    4809             : ! Calculate the length of receive segments
    4810        7680 :       call ParExchangeLength(InComm,sCount,rCount)
    4811             : !  Record some information 
    4812     5905920 :       Info%numSendSeg   = SUM(sCount)
    4813     5905920 :       InFo%numSendNeigh = COUNT(sCount > 0) 
    4814             : 
    4815     5905920 :       Info%numRecvSeg   = SUM(rCount)
    4816     5905920 :       InFo%numRecvNeigh = COUNT(rCount > 0) 
    4817        7680 :       nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
    4818        7680 :       call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
    4819             : 
    4820        7680 :       deallocate(sCount,rCount)
    4821             : 
    4822             :       CPP_LEAVE_PROCEDURE( "PARCALCLENGTHDECOMPTOGHOST" )
    4823        7680 :       RETURN
    4824             : !EOC
    4825        7680 :       end subroutine ParCalcInfoDecompToGhost
    4826             : !-----------------------------------------------------------------------
    4827             : !-----------------------------------------------------------------------
    4828             : !BOP  
    4829             : ! !IROUTINE:   ParCalcInfoDecompToDecomp --- calculates info about the pattern
    4830             : !
    4831             : ! !INTERFACE:
    4832       21504 :       subroutine ParCalcInfoDecompToDecomp(InComm, DA,DB,Info)
    4833             : !     
    4834             : ! !USES:
    4835             :       USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
    4836             :       IMPLICIT NONE
    4837             : 
    4838             : ! !INPUT PARAMETERS:
    4839             :       integer, intent(in)           :: InComm  ! communicator
    4840             :       type(DecompType), intent(in)  :: DA      ! Source Decomp Desc
    4841             :       type(DecompType), intent(in)  :: DB      ! Destination Decomp Desc
    4842             : 
    4843             : ! !OUTPUT PARAMETERS:
    4844             :       type (ParInfoType), intent(out) :: Info  ! Info structure
    4845             : !           
    4846             : ! !DESCRIPTION:
    4847             : !     This routine calulcates the information about a communication
    4848             : !     pattern that transforms from one decomposition to another,
    4849             : !     i.e., a so-called "transpose".  This is a copy of an algorithm
    4850             : !     from the ParPatternDecompToDecomp subroutine.
    4851             : !
    4852             : ! !SYSTEM ROUTINES:
    4853             : !    MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
    4854             : !
    4855             : ! !REVISION HISTORY:
    4856             : !   07.09.04   Dennis     Creation based on algorithm in ParPatternDecompToDecomp
    4857             : !
    4858             : !EOP
    4859             : !-----------------------------------------------------------------------
    4860             : !BOC
    4861             : ! !LOCAL VARIABLES:
    4862             :       integer :: nCount,npes,iam,ierr
    4863             :       integer :: nTags,oldpe,oldlocal,sTag,eTag
    4864             :       integer :: j,pe,local,tag,tmpA,tmpB,npesA,npesB
    4865       21504 :       integer, allocatable :: sCount(:),rCount(:)
    4866             : 
    4867       21504 :       call DecompInfo(DA,npesA,tmpA)
    4868       21504 :       call DecompInfo(DB,npesB,tmpB)
    4869             : 
    4870       21504 :       call MPI_COMM_SIZE(InComm,npes,ierr)
    4871       21504 :       call MPI_COMM_RANK(InComm,iam,ierr)
    4872             :   
    4873       86016 :       allocate(sCount(npes),rCount(npes))
    4874    16536576 :       sCount=0
    4875    16536576 :       rCount=0
    4876       21504 :       if(iam .lt. npesB)  then
    4877             : ! Parse through all the tags in the local segment
    4878       21504 :         nTags = SIZE(DB%Head(iam+1)%StartTags)
    4879      588288 :         do j=1,nTags
    4880      566784 :           oldpe = -1
    4881      566784 :           sTag = DB%Head(iam+1)%StartTags(j)
    4882      566784 :           eTag = DB%Head(iam+1)%EndTags(j)
    4883    25139712 :           do tag = sTag,eTag
    4884             : !
    4885             : ! Determine the index and PE of this entry on A. This might be inlined later
    4886             : !
    4887    24551424 :             call DecompGlobalToLocal(DA,tag,Local,Pe)
    4888             : !
    4889             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    4890             : !
    4891    25118208 :             if( pe /= oldpe ) then 
    4892     1022976 :               oldpe = pe
    4893     1022976 :               sCount(pe+1) = sCount(pe+1) + 1
    4894             :             endif
    4895             :           enddo
    4896             :         enddo
    4897             :       endif
    4898             : ! Calculate the length of recieve segments    
    4899       21504 :       call ParExchangeLength(InComm,sCount,rCount)      
    4900             : !  Record some information
    4901    16536576 :       Info%numSendSeg   = SUM(sCount)
    4902    16536576 :       InFo%numSendNeigh = COUNT(sCount > 0)
    4903             : 
    4904    16536576 :       Info%numRecvSeg   = SUM(rCount)
    4905    16536576 :       InFo%numRecvNeigh = COUNT(rCount > 0)
    4906       21504 :       nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
    4907       21504 :       call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
    4908             :   
    4909       21504 :       deallocate(sCount,rCount)
    4910             : 
    4911             :       CPP_LEAVE_PROCEDURE( "PARCALCINFODECOMPTODECOMP" )
    4912       21504 :       RETURN
    4913             : !EOC
    4914       21504 :       end subroutine ParCalcInfoDecompToDecomp
    4915             : !--------------------------------------------------------------------------------------------------------
    4916             : !-----------------------------------------------------------------------
    4917             : !BOP
    4918             : ! !IROUTINE:   ParCalcInfoGhostToDecomp --- calculates info about the pattern
    4919             : !
    4920             : ! !INTERFACE:
    4921        6144 :       subroutine ParCalcInfoGhostToDecomp(InComm, GA,DB,Info)
    4922             : !
    4923             : ! !USES:
    4924             :       USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
    4925             :       USE ghostmodule, ONLY : GhostType,GhostInfo
    4926             :       IMPLICIT NONE
    4927             : 
    4928             : ! !INPUT PARAMETERS:
    4929             :       integer, intent(in)           :: InComm  ! communicator
    4930             :       type(GhostType), intent(in)   :: GA      ! Source Ghost Desc
    4931             :       type(DecompType), intent(in)  :: DB      ! Destination Decomp Desc
    4932             : 
    4933             : ! !OUTPUT PARAMETERS:
    4934             :       type (ParInfoType), intent(out) :: Info  ! Info structure
    4935             : !
    4936             : ! !DESCRIPTION:
    4937             : !     This routine calulcates the information about a communication
    4938             : !     pattern that transforms from one decomposition to another,
    4939             : !     i.e., a so-called "transpose".  This is a copy of an algorithm
    4940             : !     from the ParPatternGhostToDecomp subroutine.
    4941             : !
    4942             : ! !SYSTEM ROUTINES:
    4943             : !    MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
    4944             : !
    4945             : ! !REVISION HISTORY:
    4946             : !   07.09.04   Dennis     Creation based on algorithm in ParPatternGhostToDecomp
    4947             : !
    4948             : !EOP
    4949             : !-----------------------------------------------------------------------
    4950             : !BOC
    4951             : ! !LOCAL VARIABLES:
    4952             :       integer :: nTags,oldpe,oldlocal,sTag,eTag
    4953             :       integer :: npes, nCount,iam,ierr
    4954             :       integer :: j,pe,local,tag,npesA,npesB,tmpB,tmp1A,tmp2A,tmp3A
    4955        6144 :       integer, allocatable :: sCount(:),rCount(:)
    4956             : 
    4957        6144 :       call GhostInfo(GA,npesA,tmp1A,tmp2A,tmp3A)
    4958        6144 :       call DecompInfo(DB,npesB,tmpB)
    4959             : 
    4960        6144 :       call MPI_COMM_SIZE(InComm,npes,ierr)
    4961        6144 :       call MPI_COMM_RANK(InComm,iam,ierr)
    4962             :   
    4963       24576 :       allocate(sCount(npes),rCount(npes))
    4964     4724736 :       sCount=0
    4965     4724736 :       rCount=0
    4966        6144 :       if(iam .lt. npesB) then 
    4967             : ! Parse through all the tags in the local segment
    4968        6144 :         nTags = SIZE(DB%Head(iam+1)%StartTags)
    4969      600576 :         do j=1,nTags
    4970      594432 :           oldpe = -1
    4971      594432 :           oldlocal = 0
    4972      594432 :           sTag = DB%Head(iam+1)%StartTags(j)
    4973      594432 :           eTag = DB%Head(iam+1)%EndTags(j)
    4974    14866944 :           do tag = sTag,eTag
    4975             : !
    4976             : ! Determine the index and PE of this entry on A. This might be inlined later
    4977             : !
    4978    14266368 :             call DecompGlobalToLocal(GA%Decomp,tag,Local,Pe)
    4979             : !
    4980             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    4981             : !
    4982    14266368 :             if ((pe /= -1) .and. ( pe /= oldpe .or. local /= OldLocal+1 )) then 
    4983      594432 :               sCount(pe+1) = sCount(pe+1) + 1
    4984             :             endif
    4985    14266368 :             oldpe = pe
    4986    29127168 :             oldlocal = local
    4987             :           enddo
    4988             :         enddo
    4989             :       endif
    4990             : ! Calculate the lenght of recieve segments
    4991        6144 :       call ParExchangeLength(InComm,sCount,rCount)
    4992             : !  Record some information
    4993     4724736 :       Info%numSendSeg   = SUM(sCount)
    4994     4724736 :       InFo%numSendNeigh = COUNT(sCount > 0)
    4995             : 
    4996     4724736 :       Info%numRecvSeg   = SUM(rCount)
    4997     4724736 :       InFo%numRecvNeigh = COUNT(rCount > 0)
    4998        6144 :       nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
    4999        6144 :       call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
    5000             : 
    5001        6144 :       deallocate(sCount,rCount)
    5002             : 
    5003             :       CPP_LEAVE_PROCEDURE( "PARCALCLENGTHGHOSTTODECOMP" )
    5004        6144 :       RETURN
    5005             : !EOC
    5006        6144 :       end subroutine ParCalcInfoGhostToDecomp
    5007             : !-----------------------------------------------------------------------
    5008             : !-----------------------------------------------------------------------
    5009             : !BOP
    5010             : ! !IROUTINE:   ParCalcInfoGhostToGhost --- calculates info about the pattern
    5011             : !
    5012             : ! !INTERFACE:
    5013           0 :       subroutine ParCalcInfoGhostToGhost(InComm, GA,GB,Info)
    5014             : !
    5015             : ! !USES:
    5016             :       USE decompmodule, ONLY : DecompGlobalToLocal
    5017             :       USE ghostmodule, ONLY  : GhostType,GhostInfo
    5018             :       IMPLICIT NONE
    5019             : 
    5020             : ! !INPUT PARAMETERS:
    5021             :       integer, intent(in)           :: InComm ! communicator
    5022             :       type(GhostType), intent(in)   :: GA     ! Source Ghost Desc
    5023             :       type(GhostType), intent(in)   :: GB     ! Destination Ghost Desc
    5024             : 
    5025             : ! !OUTPUT PARAMETERS:
    5026             :       type (ParInfoType), intent(out) :: Info  ! Info structure
    5027             : !
    5028             : ! !DESCRIPTION:
    5029             : !     This routine calulcates the information about a communication
    5030             : !     pattern that transforms from one decomposition to another,
    5031             : !     i.e., a so-called "transpose".  This is a copy of an algorithm
    5032             : !     from the ParPatternGhostToGhost subroutine.
    5033             : !
    5034             : ! !SYSTEM ROUTINES:
    5035             : !    MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
    5036             : !
    5037             : ! !REVISION HISTORY:
    5038             : !   07.09.04   Dennis     Creation based on algorithm in ParPatternGhostToGhost
    5039             : !
    5040             : !EOP
    5041             : !-----------------------------------------------------------------------
    5042             : !BOC
    5043             : ! !LOCAL VARIABLES:
    5044             :       integer :: nTags,oldpe,oldlocal,sTag,eTag,ierr,nCount
    5045             :       integer :: j,pe,local,tag,npes,iam,npesA,npesB
    5046             :       integer :: tmp1A,tmp2A,tmp3A,tmp1B,tmp2B,tmp3B
    5047           0 :       integer, allocatable :: sCount(:),rCount(:)
    5048             : 
    5049           0 :       call GhostInfo(GA,npesA,tmp1A,tmp2A,tmp3A)
    5050           0 :       call GhostInfo(GB,npesB,tmp1B,tmp2B,tmp3B)
    5051             : 
    5052           0 :       call MPI_COMM_SIZE(InComm,npes,ierr)
    5053           0 :       call MPI_COMM_RANK(InComm,iam,ierr)
    5054             :   
    5055           0 :       allocate(sCount(npes),rCount(npes))
    5056           0 :       sCount=0
    5057           0 :       rCount=0 
    5058           0 :       if(iam .lt. npesB) then 
    5059             : ! Parse through all the tags in the local segment
    5060           0 :         nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
    5061           0 :         do j=1,nTags
    5062           0 :           oldpe = -1
    5063           0 :           oldlocal = 0
    5064           0 :           sTag = GB%Local%Head(iam+1)%StartTags(j)
    5065           0 :           eTag = GB%Local%Head(iam+1)%EndTags(j)
    5066           0 :           do tag = sTag,eTag
    5067           0 :             if (Tag > 0 ) THEN 
    5068             : !
    5069             : ! Determine the index and PE of this entry on A. This might be inlined later
    5070             : !
    5071           0 :               call DecompGlobalToLocal(GA%Decomp,tag,Local,Pe)
    5072             : !
    5073             : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
    5074             : !
    5075           0 :               if( pe /= oldpe .or. local /= OldLocal+1 ) then 
    5076           0 :                 sCount(pe+1)=sCount(pe+1)+1
    5077             :               endif
    5078           0 :               oldpe = pe
    5079           0 :               oldlocal = local
    5080             :             endif
    5081             :           enddo
    5082             :         enddo
    5083             :       endif
    5084             : 
    5085             : ! Calculate the length of receive segments
    5086           0 :       call ParExchangeLength(InComm,sCount,rCount)
    5087             : !  Record some information
    5088           0 :       Info%numSendSeg   = SUM(sCount)
    5089           0 :       InFo%numSendNeigh = COUNT(sCount > 0)
    5090             : 
    5091           0 :       Info%numRecvSeg   = SUM(rCount)
    5092           0 :       InFo%numRecvNeigh = COUNT(rCount > 0)
    5093           0 :       nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
    5094           0 :       call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
    5095             : 
    5096           0 :       deallocate(sCount,rCount)
    5097             : 
    5098             :       CPP_LEAVE_PROCEDURE( "PARCALCINFOGHOSTTOGHOST" )
    5099           0 :       RETURN
    5100             : !EOC
    5101           0 :       end subroutine ParCalcInfoGhostToGhost
    5102             : # endif
    5103             : #endif
    5104           0 :       END MODULE parutilitiesmodule

Generated by: LCOV version 1.14