Line data Source code
1 : module spmd_dyn
2 :
3 : !-----------------------------------------------------------------------
4 : ! Subroutines to initialize SPMD implementation of FV
5 : !
6 : ! !REVISION HISTORY:
7 : ! 00.09.30 Sawyer Alterations for LR SPMD mode
8 : ! 01.05.09 Mirin 2-D yz decomposition
9 : ! 01.06.27 Mirin Secondary 2-D xy decomposition
10 : ! 01.12.20 Sawyer Changed index order of Q3 decomposition
11 : ! 03.05.07 Sawyer Removed unneeded decompositions
12 : ! 06.03.01 Sawyer Removed tracertrans-related variables
13 : !-----------------------------------------------------------------------
14 :
15 : use spmd_utils, only: iam, masterproc, npes, mpicom
16 :
17 : use pmgrid, only: plat, plon, plev, spmd_on
18 : use constituents, only: pcnst
19 :
20 : use dynamics_vars, only: t_fvdycore_grid
21 : use dyn_internal_state, only: get_dyn_state_grid
22 :
23 : use cam_abortutils, only: endrun
24 : use cam_logfile, only: iulog
25 :
26 : implicit none
27 : private
28 : save
29 :
30 : public :: &
31 : spmd_readnl, &
32 : spmdinit_dyn, &
33 : compute_gsfactors, &
34 : spmdbuf
35 :
36 : public :: &
37 : local_dp_map, &
38 : block_buf_nrecs, &
39 : chunk_buf_nrecs, &
40 : proc, &
41 : lonrangexy, &
42 : latrangexy
43 :
44 : ! local_dp_map, block_buf_nrecs, chunk_buf_nrecs belong somewhere else. They are just
45 : ! stored here without being set or used
46 : logical :: local_dp_map=.false. ! flag indicates that mapping between dynamics
47 : ! and physics decompositions does not require
48 : ! interprocess communication
49 : integer :: block_buf_nrecs ! number of local grid points (lon,lat,lev)
50 : ! in dynamics decomposition (including level 0)
51 : integer :: chunk_buf_nrecs ! number of local grid points (lon,lat,lev)
52 : ! in physics decomposition (including level 0)
53 :
54 : ! used by dyn_grid::get_block_owner_d
55 : integer :: proc(plat) ! processor id associated with a given lat.
56 :
57 : ! used by dyn_grid:: get_gcol_block_d, get_block_gcol_cnt_d, get_block_gcol_d
58 : integer, allocatable :: lonrangexy(:,:) ! global xy-longitude subdomain index
59 : integer, allocatable :: latrangexy(:,:) ! global xy-latitude subdomain index
60 :
61 : integer :: force_2d = 0 !option to force transpose computation for 1D decomp.
62 : integer :: geopkblocks = 1 !number of stages to use in Z-serial non-transpose
63 : ! geopotential method (routine geopk_d)
64 : ! with 2D decomp.
65 : logical :: geopkdist = .false. !use a distributed method for geopotential calculation
66 : ! with 2D decomp.
67 : logical :: geopk16byte = .false. !use Z-parallel distributed method for geopotential
68 : ! calculation with 2D decomp.; otherwise, use Z-serial
69 : ! pipeline algorithm
70 : integer :: geopktrans = 0
71 : integer :: npr_yz(4) !yz and xy decompositions
72 : integer :: modcomm_transpose = 0 !mod_comm transpose method
73 : ! 0 for temporary contiguous buffers
74 : ! 1 for mpi derived types
75 : integer :: modcomm_geopk = 0 !mod_comm geopk method
76 : ! 0 for temporary contiguous buffers
77 : ! 1 for mpi derived types
78 : integer :: modcomm_gatscat = 0 !mod_comm gather/scatter method
79 : ! 0 for temporary contiguous buffers
80 : ! 1 for mpi derived types
81 : integer :: modc_sw_dynrun = 0 !mod_comm irregular underlying communication method for dyn_run/misc
82 : ! 0 for original mp_sendirr/mp_recvirr
83 : ! 1 for mp_swapirr and point-to-point communications
84 : ! 2 for mp_swapirr and all-to-all communications
85 : logical :: modc_hs_dynrun = .true. !mod_comm irreg comm handshaking for dyn_run/misc
86 : logical :: modc_send_dynrun = .true. ! true for mod_comm irregular communication blocking send for
87 : ! dyn_run/misc, false for nonblocking send
88 : integer :: modc_mxreq_dynrun = -1 !maximum number of nonblocking communication requests to allow
89 : ! when using mp_swapirr and point-to-point communications for
90 : ! dyn_run/misc
91 : ! < 0 implies no limits
92 : integer :: modc_sw_cdcore = 0 !mod_comm irregular underlying communication method for cd_core/geopk
93 : ! 0 for original mp_sendirr/mp_recvirr
94 : ! 1 for mp_swapirr and point-to-point communications
95 : ! 2 for mp_swapirr and all-to-all communications
96 : logical :: modc_hs_cdcore = .true. ! true for mod_comm irregular communication handshaking for cd_core/geopk
97 : logical :: modc_send_cdcore = .true. ! true for geopk_d or mod_comm irregular communication blocking send for
98 : ! cd_core/geopk, false for nonblocking send
99 : integer :: modc_mxreq_cdcore = -1 ! maximum number of nonblocking communication requests to allow
100 : ! when using mp_swapirr and point-to-point communications for
101 : ! cd_core/geopk
102 : ! < 0 implies no limits
103 : integer :: modc_sw_gather = 1 ! mod_comm irregular underlying communication method for gather
104 : ! 0 for original mp_sendirr/mp_recvirr
105 : ! 1 for mp_swapirr and point-to-point communications
106 : ! 2 for mp_swapirr and all-to-all communications
107 : logical :: modc_hs_gather = .true. ! true for mod_comm irregular communication handshaking for gather
108 : logical :: modc_send_gather = .true. ! true for mod_comm irregular communication blocking send for
109 : ! gather, false for nonblocking send
110 : integer :: modc_mxreq_gather = 64 ! maximum number of nonblocking communication requests to allow
111 : ! when using mp_swapirr and point-to-point communications for
112 : ! gather
113 : ! < 0 implies no limits
114 : integer :: modc_sw_scatter = 0 ! mod_comm irregular underlying communication method for scatter
115 : ! 0 for original mp_sendirr/mp_recvirr
116 : ! 1 for mp_swapirr and point-to-point communications
117 : ! 2 for mp_swapirr and all-to-all communications
118 : logical :: modc_hs_scatter = .false. ! true for mod_comm irregular communication handshaking for scatter
119 : logical :: modc_send_scatter = .true. ! true for mod_comm irregular communication blocking send for
120 : ! scatter, false for nonblocking send
121 : integer :: modc_mxreq_scatter = -1 ! maximum number of nonblocking communication requests to allow
122 : ! when using mp_swapirr and point-to-point communications for
123 : ! scatter
124 : ! < 0 implies no limits
125 : integer :: modc_sw_tracer = 0 ! mod_comm irregular underlying communication method for multiple tracers
126 : ! 0 for original mp_sendirr/mp_recvirr
127 : ! 1 for mp_swapirr and point-to-point communications
128 : ! 2 for mp_swapirr and all-to-all communications
129 : logical :: modc_hs_tracer = .true. ! true for mod_comm irregular communication handshaking for multiple tracers
130 : logical :: modc_send_tracer = .true. ! true for mod_comm irregular communication blocking send for
131 : ! multiple tracers, false for nonblocking send
132 : integer :: modc_mxreq_tracer = -1 ! maximum number of nonblocking communication requests to allow
133 : ! when using mp_swapirr and point-to-point communications for
134 : ! multiple tracers
135 : ! < 0 implies no limits
136 : integer :: modc_onetwo = 2 !one or two simultaneous mod_comm irregular communications
137 : ! (excl. tracers)
138 : integer :: modc_tracers = 3 ! max number of tracers for simultaneous mod_comm irregular communications
139 : ! 0 for original mp_sendirr/mp_recvirr communications
140 : ! positive for special tracer routines
141 :
142 : integer :: fv_ct_overlap = 0 ! nonzero for overlap of cd_core and trac2d, 0 otherwise
143 : integer :: fv_trac_decomp = 1 ! size of tracer domain decomposition for trac2d
144 :
145 : ! these vars used in beglat and endlat determination, and in compute_gsfactors
146 : integer, allocatable :: nlat_p(:) ! number of latitudes per subdomain in YZ decomp
147 : integer, allocatable :: cut(:,:) ! latitude partition for MPI tasks in YZ decomp
148 :
149 : integer :: npr_y
150 : integer :: npr_z
151 : integer :: nprxy_x
152 : integer :: nprxy_y
153 : integer :: npes_yz
154 : integer :: npes_xy
155 : integer :: myid_y
156 : integer :: myid_z
157 : integer :: myidxy_x
158 : integer :: myidxy_y
159 :
160 : integer :: numlats
161 :
162 : !========================================================================================
163 : contains
164 : !========================================================================================
165 :
166 1536 : subroutine spmd_readnl(nlfilename)
167 :
168 : use namelist_utils, only: find_group_name
169 : use units, only: getunit, freeunit
170 : use spmd_utils, only: mstrid=>masterprocid, mpi_integer, mpi_logical,&
171 : mpi_success
172 : ! args
173 : character(len=*), intent(in) :: nlfilename
174 :
175 : ! Local variables
176 : integer :: ierr ! error code
177 : integer :: unitn ! namelist unit number
178 :
179 : namelist /spmd_fv_inparm/ npr_yz, &
180 : geopktrans, geopkblocks, &
181 : force_2d, modcomm_transpose, &
182 : modcomm_geopk, modcomm_gatscat, &
183 : modc_sw_dynrun, modc_hs_dynrun, &
184 : modc_send_dynrun, modc_mxreq_dynrun, &
185 : modc_sw_cdcore, modc_hs_cdcore, &
186 : modc_send_cdcore, modc_mxreq_cdcore, &
187 : modc_sw_gather, modc_hs_gather, &
188 : modc_send_gather, modc_mxreq_gather, &
189 : modc_sw_scatter, modc_hs_scatter, &
190 : modc_send_scatter, modc_mxreq_scatter, &
191 : modc_sw_tracer, modc_hs_tracer, &
192 : modc_send_tracer, modc_mxreq_tracer, &
193 : modc_onetwo, modc_tracers, &
194 : fv_ct_overlap, fv_trac_decomp
195 :
196 : character(len=*), parameter :: sub = "spmd_readnl"
197 :
198 : type(t_fvdycore_grid), pointer :: grid
199 :
200 : integer :: color, ierror, ntemp
201 : integer :: twod_decomp
202 : integer :: mpicom_yz ! communicator for yz decomposition
203 : integer :: mpicom_nyz ! communicator for multiple yz decomposition
204 : integer :: mpicom_xy ! communicator for xy decomposition
205 : !----------------------------------------------------------------------
206 :
207 : ! Default 1D domain decomposition
208 1536 : npr_yz(1) = npes
209 1536 : npr_yz(2) = 1
210 1536 : npr_yz(3) = 1
211 1536 : npr_yz(4) = npes
212 :
213 1536 : if (masterproc) then
214 2 : write(iulog,*) sub//': Read in spmd_fv_inparm namelist from: ', trim(nlfilename)
215 2 : unitn = getunit()
216 2 : open( unitn, file=trim(nlfilename), status='old' )
217 :
218 : ! Look for spmd_fv_inparm group name in the input file. If found, leave the
219 : ! file positioned at that namelist group.
220 2 : call find_group_name(unitn, 'spmd_fv_inparm', status=ierr)
221 2 : if (ierr == 0) then ! found spmd_fv_inparm
222 2 : read(unitn, spmd_fv_inparm, iostat=ierr) ! read the spmd_fv_inparm namelist group
223 2 : if (ierr /= 0) then
224 0 : call endrun(sub//': ERROR reading namelist spmd_fv_inparm')
225 : end if
226 : end if
227 2 : close( unitn )
228 2 : call freeunit( unitn )
229 : endif
230 :
231 1536 : call mpi_bcast(npr_yz, 4, mpi_integer, mstrid, mpicom, ierr)
232 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: npr_yz")
233 :
234 1536 : call mpi_bcast(geopktrans, 1, mpi_integer, mstrid, mpicom, ierr)
235 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: geopktrans")
236 :
237 1536 : call mpi_bcast(geopkblocks, 1, mpi_integer, mstrid, mpicom, ierr)
238 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: geopkblocks")
239 :
240 1536 : call mpi_bcast(force_2d, 1, mpi_integer, mstrid, mpicom, ierr)
241 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: force_2d")
242 :
243 1536 : call mpi_bcast(modcomm_transpose, 1, mpi_integer, mstrid, mpicom, ierr)
244 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_transpose")
245 :
246 1536 : call mpi_bcast(modcomm_geopk, 1, mpi_integer, mstrid, mpicom, ierr)
247 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_geopk")
248 :
249 1536 : call mpi_bcast(modcomm_gatscat, 1, mpi_integer, mstrid, mpicom, ierr)
250 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_gatscat")
251 :
252 1536 : call mpi_bcast(modc_sw_dynrun, 1, mpi_integer, mstrid, mpicom, ierr)
253 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_dynrun")
254 :
255 1536 : call mpi_bcast(modc_hs_dynrun, 1, mpi_logical, mstrid, mpicom, ierr)
256 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_dynrun")
257 :
258 1536 : call mpi_bcast(modc_send_dynrun, 1, mpi_logical, mstrid, mpicom, ierr)
259 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_dynrun")
260 :
261 1536 : call mpi_bcast(modc_mxreq_dynrun, 1, mpi_integer, mstrid, mpicom, ierr)
262 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_dynrun")
263 :
264 1536 : call mpi_bcast(modc_sw_cdcore, 1, mpi_integer, mstrid, mpicom, ierr)
265 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_cdcore")
266 :
267 1536 : call mpi_bcast(modc_hs_cdcore, 1, mpi_logical, mstrid, mpicom, ierr)
268 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_cdcore")
269 :
270 1536 : call mpi_bcast(modc_send_cdcore, 1, mpi_logical, mstrid, mpicom, ierr)
271 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_cdcore")
272 :
273 1536 : call mpi_bcast(modc_mxreq_cdcore, 1, mpi_integer, mstrid, mpicom, ierr)
274 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_cdcore")
275 :
276 1536 : call mpi_bcast(modc_sw_gather, 1, mpi_integer, mstrid, mpicom, ierr)
277 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_gather")
278 :
279 1536 : call mpi_bcast(modc_hs_gather, 1, mpi_logical, mstrid, mpicom, ierr)
280 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_gather")
281 :
282 1536 : call mpi_bcast(modc_send_gather, 1, mpi_logical, mstrid, mpicom, ierr)
283 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_gather")
284 :
285 1536 : call mpi_bcast(modc_mxreq_gather, 1, mpi_integer, mstrid, mpicom, ierr)
286 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_gather")
287 :
288 1536 : call mpi_bcast(modc_sw_scatter, 1, mpi_integer, mstrid, mpicom, ierr)
289 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_scatter")
290 :
291 1536 : call mpi_bcast(modc_hs_scatter, 1, mpi_logical, mstrid, mpicom, ierr)
292 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_scatter")
293 :
294 1536 : call mpi_bcast(modc_send_scatter, 1, mpi_logical, mstrid, mpicom, ierr)
295 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_scatter")
296 :
297 1536 : call mpi_bcast(modc_mxreq_scatter, 1, mpi_integer, mstrid, mpicom, ierr)
298 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_scatter")
299 :
300 1536 : call mpi_bcast(modc_sw_tracer, 1, mpi_integer, mstrid, mpicom, ierr)
301 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_tracer")
302 :
303 1536 : call mpi_bcast(modc_hs_tracer, 1, mpi_logical, mstrid, mpicom, ierr)
304 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_tracer")
305 :
306 1536 : call mpi_bcast(modc_send_tracer, 1, mpi_logical, mstrid, mpicom, ierr)
307 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_tracer")
308 :
309 1536 : call mpi_bcast(modc_mxreq_tracer, 1, mpi_integer, mstrid, mpicom, ierr)
310 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_tracer")
311 :
312 1536 : call mpi_bcast(modc_onetwo, 1, mpi_integer, mstrid, mpicom, ierr)
313 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_onetwo")
314 :
315 1536 : call mpi_bcast(modc_tracers, 1, mpi_integer, mstrid, mpicom, ierr)
316 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_tracers")
317 :
318 1536 : call mpi_bcast(fv_ct_overlap, 1, mpi_integer, mstrid, mpicom, ierr)
319 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: fv_ct_overlap")
320 :
321 1536 : call mpi_bcast(fv_trac_decomp, 1, mpi_integer, mstrid, mpicom, ierr)
322 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: fv_trac_decomp")
323 :
324 : ! Put namelist input into the grid object
325 1536 : grid => get_dyn_state_grid()
326 :
327 1536 : npr_y = npr_yz(1)
328 1536 : npr_z = npr_yz(2)
329 1536 : nprxy_x = npr_yz(3)
330 1536 : nprxy_y = npr_yz(4)
331 1536 : npes_yz = npr_y*npr_z
332 1536 : npes_xy = nprxy_x*nprxy_y
333 1536 : if (npes_yz < 1) then
334 0 : call endrun(sub//': ERROR: yz domain decomposition must have at least 1 subdomain')
335 : endif
336 1536 : if (npes_yz > npes) then
337 0 : call endrun(sub//': ERROR: incorrect yz domain decomposition')
338 : endif
339 1536 : if (npes_xy > npes) then
340 0 : call endrun(sub//': ERROR: incorrect xy domain decomposition')
341 : endif
342 :
343 1536 : grid%npr_y = npr_y
344 1536 : grid%npr_z = npr_z
345 1536 : grid%nprxy_x = nprxy_x
346 1536 : grid%nprxy_y = nprxy_y
347 1536 : grid%npes_xy = npes_xy
348 1536 : grid%npes_yz = npes_yz
349 :
350 1536 : grid%ct_overlap = fv_ct_overlap
351 1536 : grid%trac_decomp = fv_trac_decomp
352 :
353 1536 : if (fv_ct_overlap .ne. 0 .and. npes .lt. 2*npes_yz) then
354 0 : call endrun(sub//': ERROR: Not enough processes to overlap cd_core and trac2d')
355 : end if
356 :
357 1536 : if (fv_trac_decomp .le. 0) then
358 0 : call endrun(sub//': ERROR: fv_trac_decomp improperly initialized')
359 : end if
360 :
361 1536 : if (npes .lt. fv_trac_decomp*npes_yz) then
362 0 : call endrun(sub//': ERROR: Not enough processes to decompose tracers')
363 : endif
364 :
365 1536 : if (fv_ct_overlap .gt. 0 .and. fv_trac_decomp .gt. 1) then
366 0 : call endrun(sub//': ERROR: Cannot simultaneously overlap cd_core/trac2d and decompose tracers')
367 : endif
368 :
369 : ! Tracer decomposition limits
370 6144 : allocate(grid%ktloa(fv_trac_decomp), grid%kthia(fv_trac_decomp))
371 3072 : grid%ktloa(:) = 1
372 3072 : grid%kthia(:) = pcnst
373 1536 : grid%ktlo = 1
374 1536 : grid%kthi = pcnst
375 :
376 1536 : grid%commdyn = mpicom
377 1536 : grid%iam = iam
378 :
379 : #ifdef SPMD
380 1536 : myid_z = iam/npr_y
381 1536 : myid_y = iam - myid_z*npr_y
382 1536 : color = iam/npes_yz
383 1536 : call mpi_comm_split(mpicom, color, iam, mpicom_yz, ierror)
384 1536 : if (ierror /= mpi_success) then
385 0 : write(iulog,*) sub//': ERROR: mpi_comm_split_yz failed with IER=', ierror
386 0 : call endrun(sub//': ERROR: mpi_comm_split_yz failed')
387 : end if
388 1536 : call mpi_comm_size(mpicom_yz, ntemp, ierror)
389 1536 : if (iam .lt. npes_yz .and. ntemp .ne. npes_yz) then
390 0 : write(iulog,*) sub//': ERROR: mpicom_yz has incorrect size of ', ntemp
391 0 : call endrun(sub//': ERROR: mpicom_yz has incorrect size')
392 : end if
393 :
394 1536 : if (fv_ct_overlap .gt. 0 .or. fv_trac_decomp .gt. 1) then
395 : ! These are mutually exclusive options
396 0 : if ((fv_ct_overlap .gt. 0 .and. iam .lt. 2*npes_yz) .or. &
397 : (fv_trac_decomp .gt. 1 .and. iam .lt. fv_trac_decomp*npes_yz)) then
398 0 : color = 1
399 : else
400 0 : color = 0
401 : endif
402 0 : call mpi_comm_split(mpicom, color, iam, mpicom_nyz, ierror)
403 0 : if (ierror /= mpi_success) then
404 0 : write (iulog,*) sub//': ERROR: mpi_comm_split_nyz failed with IER=', ierror
405 0 : call endrun(sub//': ERROR: mpi_comm_split_nyz failed')
406 : endif
407 : else
408 1536 : mpicom_nyz = mpicom_yz
409 : endif
410 :
411 1536 : myidxy_y = iam/nprxy_x
412 1536 : myidxy_x = iam - myidxy_y*nprxy_x
413 1536 : color = iam/npes_xy
414 1536 : call mpi_comm_split(mpicom, color, iam, mpicom_xy, ierror)
415 1536 : if (ierror /= mpi_success) then
416 0 : write(iulog,*) sub//': ERROR: mpi_comm_split_xy failed with IER=', ierror
417 0 : call endrun(sub//': ERROR: mpi_comm_split_xy failed')
418 : endif
419 1536 : call mpi_comm_size(mpicom_xy, ntemp, ierror)
420 1536 : if (iam .lt. npes_xy .and. ntemp .ne. npes_xy) then
421 0 : write(iulog,*) sub//': ERROR: mpicom_xy has incorrect size of ', ntemp
422 0 : call endrun(sub//': ERROR: mpicom_xy has incorrect size')
423 : endif
424 :
425 1536 : grid%myid_z = myid_z
426 1536 : grid%myid_y = myid_y
427 1536 : grid%myidxy_y = myidxy_y
428 1536 : grid%myidxy_x = myidxy_x
429 :
430 1536 : grid%commyz = mpicom_yz
431 1536 : grid%commnyz = mpicom_nyz
432 1536 : grid%commxy = mpicom_xy
433 : #endif
434 :
435 1536 : twod_decomp = 0
436 1536 : if (npr_z > 1 .or. nprxy_x > 1 .or. force_2d .eq. 1) then
437 1536 : twod_decomp = 1
438 : endif
439 1536 : grid%twod_decomp = twod_decomp
440 :
441 1536 : if (geopktrans .ne. 0) geopkdist = .true.
442 1536 : if (geopktrans .eq. 1) geopk16byte = .true.
443 : #ifdef NO_CRAY_POINTERS
444 : if (geopk16byte) then
445 : call endrun (sub//': ERROR: cannot use geopk16 unless compiler supports cray pointers')
446 : end if
447 : #endif
448 :
449 1536 : geopkblocks = max(1,geopkblocks)
450 :
451 1536 : grid%geopkdist = geopkdist
452 1536 : grid%geopk16byte = geopk16byte
453 1536 : grid%geopkblocks = geopkblocks
454 1536 : grid%mod_method = modcomm_transpose
455 1536 : grid%mod_geopk = modcomm_geopk
456 1536 : grid%mod_gatscat = modcomm_gatscat
457 :
458 1536 : if (modc_sw_dynrun > 0 .and. modcomm_transpose > 0) then
459 0 : call endrun(sub//': ERROR: modc_sw_dynrun and modcomm_transpose are inconsistent')
460 : endif
461 :
462 1536 : grid%modc_dynrun(1) = modc_sw_dynrun
463 4608 : grid%modc_dynrun(2:3) = 0
464 1536 : if (modc_hs_dynrun) grid%modc_dynrun(2) = 1
465 1536 : if (modc_send_dynrun) grid%modc_dynrun(3) = 1
466 1536 : grid%modc_dynrun(4) = modc_mxreq_dynrun
467 :
468 1536 : if (modc_sw_cdcore > 0 .and. (modcomm_transpose > 0 .or. &
469 : (modcomm_geopk > 0 .and. geopk16byte))) then
470 0 : call endrun(sub//': ERROR: modc_sw_cdcore and modcomm_transpose are inconsistent')
471 : endif
472 :
473 1536 : grid%modc_cdcore(1) = modc_sw_cdcore
474 4608 : grid%modc_cdcore(2:3) = 0
475 1536 : if (modc_hs_cdcore) grid%modc_cdcore(2) = 1
476 1536 : if (modc_send_cdcore) grid%modc_cdcore(3) = 1
477 1536 : grid%modc_cdcore(4) = modc_mxreq_cdcore
478 :
479 1536 : if (modc_sw_gather > 0 .and. modcomm_gatscat > 0) then
480 0 : call endrun(sub//': ERROR: modc_sw_gather and modcomm_gatscat are inconsistent')
481 : endif
482 :
483 1536 : grid%modc_gather(1) = modc_sw_gather
484 4608 : grid%modc_gather(2:3) = 0
485 1536 : if (modc_hs_gather) grid%modc_gather(2) = 1
486 1536 : if (modc_send_gather) grid%modc_gather(3) = 1
487 1536 : grid%modc_gather(4) = modc_mxreq_gather
488 :
489 1536 : if (modc_sw_scatter > 0 .and. modcomm_gatscat > 0) then
490 0 : call endrun(sub//': ERROR: modc_sw_scatter and modcomm_gatscat are inconsistent')
491 : endif
492 :
493 1536 : grid%modc_scatter(1) = modc_sw_scatter
494 4608 : grid%modc_scatter(2:3) = 0
495 1536 : if (modc_hs_scatter) grid%modc_scatter(2) = 1
496 1536 : if (modc_send_scatter) grid%modc_scatter(3) = 1
497 1536 : grid%modc_scatter(4) = modc_mxreq_scatter
498 :
499 1536 : if (modc_sw_tracer > 0 .and. modcomm_transpose > 0) then
500 0 : call endrun(sub//': ERROR: modc_sw_tracer and modcomm_transpose are inconsistent')
501 : endif
502 :
503 1536 : grid%modc_tracer(1) = modc_sw_tracer
504 4608 : grid%modc_tracer(2:3) = 0
505 1536 : if (modc_hs_tracer) grid%modc_tracer(2) = 1
506 1536 : if (modc_send_tracer) grid%modc_tracer(3) = 1
507 1536 : grid%modc_tracer(4) = modc_mxreq_tracer
508 :
509 1536 : if (modc_tracers < 0) then
510 0 : call endrun(sub//': ERROR: inadmissable value of modc_tracers')
511 : endif
512 :
513 1536 : grid%modc_onetwo = modc_onetwo
514 1536 : grid%modc_tracers = modc_tracers
515 :
516 1536 : if (masterproc) then
517 2 : write(iulog,*)'FV grid decomposition:'
518 2 : write(iulog,*)' npr_y = ', npr_y, ' npr_z = ', npr_z
519 2 : write(iulog,*)' nprxy_x = ', nprxy_x, ' nprxy_y = ', nprxy_y
520 2 : write(iulog,*)' npes = ', npes, ' npes_yz= ', npes_yz, ' npes_xy = ', npes_xy
521 :
522 2 : if (npes > 1) then
523 :
524 2 : if (fv_ct_overlap .ne. 0) then
525 0 : write(iulog,*)' Overlapping tracer and dynamics subcycles'
526 : endif
527 :
528 2 : write(iulog,*)' Decomposing tracers into ', fv_trac_decomp, ' groups'
529 :
530 2 : if (twod_decomp == 0) then
531 0 : write(iulog,*)' decomposition is effectively 1D - skipping transposes'
532 : else
533 2 : write(iulog,*)' using multi-2d decomposition methodology'
534 : end if
535 :
536 2 : write(iulog,*)' non-transpose geopk communication method = ', geopkdist
537 2 : write(iulog,*)' Z-parallel non-transpose geopk communication method = ', geopk16byte
538 :
539 2 : if (geopkdist .and. (.not. geopk16byte)) then
540 0 : write(iulog,*)' number of stages in Z-serial non-transpose geopk method = ', geopkblocks
541 : endif
542 :
543 2 : write(iulog,*)' modcomm transpose method = ', modcomm_transpose
544 2 : write(iulog,*)' modcomm geopk method = ', modcomm_geopk
545 2 : write(iulog,*)' modcomm gatscat method = ', modcomm_gatscat
546 :
547 2 : write(iulog,*)' modc_sw_dynrun = ', modc_sw_dynrun
548 2 : write(iulog,*)' modc_hs_dynrun = ', modc_hs_dynrun
549 2 : write(iulog,*)' modc_send_dynrun = ', modc_send_dynrun
550 2 : write(iulog,*)' modc_mxreq_dynrun = ', modc_mxreq_dynrun
551 :
552 2 : write(iulog,*)' modc_sw_cdcore = ', modc_sw_cdcore
553 2 : write(iulog,*)' modc_hs_cdcore = ', modc_hs_cdcore
554 2 : write(iulog,*)' modc_send_cdcore = ', modc_send_cdcore
555 2 : write(iulog,*)' modc_mxreq_cdcore = ', modc_mxreq_cdcore
556 :
557 2 : write(iulog,*)' modc_sw_gather = ', modc_sw_gather
558 2 : write(iulog,*)' modc_hs_gather = ', modc_hs_gather
559 2 : write(iulog,*)' modc_send_gather = ', modc_send_gather
560 2 : write(iulog,*)' modc_mxreq_gather = ', modc_mxreq_gather
561 :
562 2 : write(iulog,*)' modc_sw_scatter = ', modc_sw_scatter
563 2 : write(iulog,*)' modc_hs_scatter = ', modc_hs_scatter
564 2 : write(iulog,*)' modc_send_scatter = ', modc_send_scatter
565 2 : write(iulog,*)' modc_mxreq_scatter = ', modc_mxreq_scatter
566 :
567 2 : write(iulog,*)' modc_sw_tracer = ', modc_sw_tracer
568 2 : write(iulog,*)' modc_hs_tracer = ', modc_hs_tracer
569 2 : write(iulog,*)' modc_send_tracer = ', modc_send_tracer
570 2 : write(iulog,*)' modc_mxreq_tracer = ', modc_mxreq_tracer
571 :
572 2 : write(iulog,*)' modc_onetwo = ', modc_onetwo
573 2 : write(iulog,*)' modc_tracers = ', modc_tracers
574 :
575 : end if
576 :
577 : end if
578 :
579 1536 : end subroutine spmd_readnl
580 :
581 : !========================================================================================
582 :
583 1536 : subroutine spmdinit_dyn(jord, grid)
584 :
585 : !-----------------------------------------------------------------------
586 : ! Initialize grid decomposition.
587 : !
588 : ! !REVISION HISTORY:
589 : ! 00.09.30 Sawyer Added LR-specific initialization
590 : ! 01.06.27 Mirin Secondary 2-D xy decomposition
591 : ! 01.10.16 Sawyer Added Y at each Z decompositions
592 : ! 03.07.22 Sawyer Removed decomps used by highp2
593 : !-----------------------------------------------------------------------
594 :
595 : #ifdef SPMD
596 : use parutilitiesmodule, only: parinit, gid, parcollective, maxop
597 : use dynamics_vars, only: spmd_vars_init
598 : #endif
599 :
600 : ! arguments
601 : integer, intent(in) :: jord
602 : type(t_fvdycore_grid), pointer :: grid
603 :
604 : ! local variables:
605 :
606 : integer, parameter :: numbnd = 0 ! no.of latitudes passed N and S of forecast lat
607 :
608 : integer :: beglat
609 : integer :: endlat
610 : integer :: beglev
611 : integer :: endlev
612 :
613 : integer :: mod_maxirr
614 :
615 : integer :: procid ! processor id
616 : integer :: procids ! processor id
617 : integer :: procidn ! processor id
618 :
619 : integer :: j, k
620 : integer :: lat
621 : integer :: vert
622 : integer :: lonn
623 : integer :: workleft ! amount of work still to be parcelled out
624 :
625 : integer :: isum ! running total of work parcelled out
626 : integer :: smostlat ! southern-most latitude index
627 : integer :: nmostlat ! northern-most latitude index
628 :
629 1536 : integer, allocatable :: ydist(:) ! number of lats per subdomain in YZ decomp
630 1536 : integer, allocatable :: zdist(:) ! number of levels per subdomain in YZ decomp
631 :
632 : integer :: beglonxy, endlonxy
633 : integer :: beglatxy, endlatxy
634 1536 : integer, allocatable :: xdistxy(:) ! number of xy-longs per subdomain
635 1536 : integer, allocatable :: ydistxy(:) ! number of xy-lats per subdomain
636 :
637 1536 : integer, allocatable :: tmp(:)
638 1536 : integer, allocatable :: jmyz(:), kmyz(:) ! used for nonblocking receive
639 1536 : integer, allocatable :: imxy(:), jmxy(:) ! used for nonblocking receive
640 :
641 : character(len=*), parameter :: sub = "spmdinit_dyn"
642 : !---------------------------------------------------------------------------
643 :
644 : ! Default YZ decomposition is 1D
645 1536 : beglev = 1
646 1536 : endlev = plev
647 :
648 1536 : mod_maxirr = max(modc_onetwo, modc_tracers)
649 :
650 : #ifdef SPMD
651 :
652 1536 : spmd_on = 1
653 :
654 : ! Initialize PILGRIM library
655 : call parinit(comm=mpicom, &
656 : npryzxy = (/ npr_y, npr_z, nprxy_x, nprxy_y /), &
657 : mod_method = modcomm_transpose, &
658 : mod_geopk = modcomm_geopk, &
659 : mod_maxirr = mod_maxirr, &
660 7680 : mod_gatscat = modcomm_gatscat )
661 :
662 : ! Compute Y partition for YZ decomposition
663 :
664 4608 : allocate(ydist (npr_y))
665 4608 : allocate(nlat_p (0:npes-1))
666 4608 : allocate(cut (2,0:npes-1))
667 :
668 99840 : ydist(:) = 0
669 1181184 : nlat_p(:) = 0
670 1181184 : cut(1,:) = -1
671 1181184 : cut(2,:) = -2
672 :
673 1536 : lat = plat / npr_y
674 1536 : workleft = plat - lat * npr_y
675 1536 : if (lat < 3) then
676 0 : call endrun(sub//': ERROR: less than 3 latitudes per subdomain')
677 : end if
678 :
679 : ! Be careful: ydist is 1-based. CAMs arrays, e.g., cut, are 0-based
680 :
681 99840 : do procid = 1, npr_y
682 99840 : ydist(procid) = lat
683 : end do
684 :
685 1536 : if ( workleft /= 0 ) then
686 0 : procids = (npr_y+1) / 2
687 0 : procidn = procids + 1
688 0 : do while ( workleft /= 0 )
689 0 : if ( procids == 1 ) procids = npr_y
690 0 : ydist(procids) = ydist(procids) + 1
691 0 : workleft = workleft - 1
692 0 : if ( workleft /= 0 ) then
693 0 : ydist(procidn) = ydist(procidn) + 1
694 0 : workleft = workleft - 1
695 : end if
696 0 : procidn = procidn + 1
697 0 : procids = procids - 1
698 : end do
699 : end if
700 :
701 : ! Safety checks:
702 99840 : if (sum(ydist) /= plat) then
703 0 : write(iulog,*) sub//': ERROR: sum(ydist)=', sum(ydist),' not equal to plat'
704 0 : call endrun(sub//': ERROR: sum(ydist) not equal to plat')
705 : end if
706 1536 : if (workleft/=0) then
707 0 : write(iulog,*) sub//': ERROR: workleft for ydist not zero. workleft=', workleft
708 0 : call endrun(sub//': ERROR: workleft for ydist not zero')
709 : end if
710 :
711 : ! Set the CAM data structures
712 1536 : lat = 0
713 99840 : do procid = 0, npr_y-1
714 98304 : cut(1,procid) = lat + 1
715 98304 : lat = lat + ydist(procid+1)
716 98304 : cut(2,procid) = lat
717 98304 : nlat_p(procid) = ydist(procid+1)
718 :
719 98304 : if (masterproc) then
720 128 : write(iulog,*) 'nlat_p(',procid,') = ', nlat_p(procid)
721 : end if
722 :
723 99840 : if (myid_y == procid) then
724 1536 : beglat = cut(1,myid_y)
725 1536 : endlat = cut(2,myid_y)
726 1536 : numlats = ydist(procid+1)
727 : end if
728 : end do
729 :
730 18432 : do k = 1, npr_z-1
731 1099776 : do j = 0, npr_y-1
732 1081344 : procid = j + k*npr_y
733 1081344 : cut(1,procid) = cut(1,j)
734 1081344 : cut(2,procid) = cut(2,j)
735 1098240 : nlat_p(procid) = nlat_p(j)
736 : end do
737 : end do
738 :
739 1536 : proc(:) = 0
740 1181184 : do procid = 0, npr_y*npr_z-1
741 :
742 : ! Determine which processor is responsible for the defined latitudes
743 4720128 : do lat = cut(1,procid), cut(2,procid)
744 4718592 : proc(lat) = procid
745 : end do
746 : end do
747 :
748 : ! Compute Z partition for YZ decomposition
749 :
750 4608 : allocate(zdist((npes-1)/npr_y+1))
751 :
752 19968 : zdist(:) = 0
753 :
754 1536 : vert = plev / npr_z
755 1536 : workleft = plev - vert * npr_z
756 1536 : if (vert < 1) then
757 0 : call endrun(sub//': ERROR: less than 1 vertical levels per subdomain')
758 : end if
759 :
760 19968 : do procid = 1, npr_z
761 19968 : zdist(procid) = vert
762 : end do
763 :
764 1536 : if (workleft /= 0) then
765 1536 : procids = (npr_z+1) / 2
766 1536 : procidn = procids + 1
767 9216 : do while ( workleft /= 0 )
768 7680 : if (procids == 1) procids = npr_z
769 7680 : zdist(procids) = zdist(procids) + 1
770 7680 : workleft = workleft - 1
771 7680 : if (workleft /= 0) then
772 7680 : zdist(procidn) = zdist(procidn) + 1
773 7680 : workleft = workleft - 1
774 : end if
775 7680 : procidn = procidn + 1
776 7680 : procids = procids - 1
777 : end do
778 : end if
779 :
780 : ! Safety checks:
781 19968 : if (sum(zdist) /= plev) then
782 0 : write(iulog,*) sub//': ERROR: sum(zdist)=', sum(zdist),' not equal to plev'
783 0 : call endrun(sub//': ERROR: sum(zdist) not equal to plev')
784 : endif
785 1536 : if (workleft/=0) then
786 0 : write(iulog,*) sub//': ERROR: workleft for zdist not zero. workleft=', workleft
787 0 : call endrun(sub//': ERROR: workleft for zdist not zero')
788 : end if
789 :
790 : ! Compute local limits
791 1536 : call locallimits(myid_z, zdist, beglev, endlev)
792 :
793 : ! Auxiliary processes only
794 1536 : if (iam >= npes_yz) then
795 0 : beglat = 1
796 0 : endlat = 0
797 0 : numlats = 0
798 0 : beglev = 1
799 0 : endlev = 0
800 : end if
801 :
802 1536 : grid%jfirst = beglat
803 1536 : grid%jlast = endlat
804 1536 : grid%kfirst = beglev
805 1536 : grid%klast = endlev
806 1536 : if (endlev == plev) then
807 128 : grid%klastp = plev+1
808 : else
809 1408 : grid%klastp = endlev
810 : end if
811 :
812 : ! Compute X partition for XY decomposition
813 :
814 4608 : allocate(xdistxy(nprxy_x))
815 19968 : xdistxy(:) = 0
816 :
817 1536 : lonn = plon / nprxy_x
818 1536 : workleft = plon - lonn * nprxy_x
819 1536 : if (lonn < 3) then
820 0 : call endrun(sub//': ERROR: less than 3 longitudes per XY subdomain')
821 : end if
822 :
823 19968 : do procid = 1, nprxy_x
824 19968 : xdistxy(procid) = lonn
825 : enddo
826 :
827 1536 : if (workleft /= 0) then
828 0 : procids = (nprxy_x+1) / 2
829 0 : procidn = procids + 1
830 0 : do while (workleft /= 0)
831 0 : if (procids == 1) procids = nprxy_x
832 0 : xdistxy(procids) = xdistxy(procids) + 1
833 0 : workleft = workleft - 1
834 0 : if (workleft /= 0) then
835 0 : xdistxy(procidn) = xdistxy(procidn) + 1
836 0 : workleft = workleft - 1
837 : end if
838 0 : procidn = procidn + 1
839 0 : procids = procids - 1
840 : end do
841 : end if
842 :
843 : ! Safety check:
844 19968 : if ( sum(xdistxy) /= plon ) then
845 0 : write(iulog,*) sub//': ERROR: sum(xdistxy)=', sum(xdistxy),' not equal to plon'
846 0 : call endrun(sub//': ERROR: sum(xdistxy) not equal to plon ')
847 : end if
848 1536 : if (workleft/=0) then
849 0 : write(iulog,*) sub//': ERROR: workleft for xdistxy not zero. workleft=',workleft
850 0 : call endrun(sub//': ERROR: workleft for xdistxy not zero')
851 : end if
852 :
853 : ! Compute local limits
854 1536 : call locallimits(myidxy_x, xdistxy, beglonxy, endlonxy)
855 :
856 : ! Compute global array for use in dyn_grid module
857 4608 : allocate (lonrangexy(2,nprxy_x))
858 1536 : lonrangexy(1,1) = 1
859 1536 : lonrangexy(2,1) = xdistxy(1)
860 18432 : do procid = 2, nprxy_x
861 16896 : lonrangexy(1,procid) = lonrangexy(2,procid-1) + 1
862 18432 : lonrangexy(2,procid) = lonrangexy(1,procid) + xdistxy(procid) - 1
863 : end do
864 :
865 : ! Compute Y partition for XY decomposition
866 :
867 4608 : allocate(ydistxy((npes-1)/nprxy_x+1))
868 99840 : ydistxy(:) = 0
869 :
870 1536 : lat = plat / nprxy_y
871 1536 : workleft = plat - lat * nprxy_y
872 1536 : if (lat < 3) then
873 0 : call endrun(sub//': ERROR: less than 3 latitudes per XY subdomain')
874 : end if
875 :
876 99840 : do procid = 1, nprxy_y
877 99840 : ydistxy(procid) = lat
878 : end do
879 :
880 1536 : if (workleft /= 0) then
881 0 : procids = (nprxy_y+1) / 2
882 0 : procidn = procids + 1
883 0 : do while (workleft /= 0)
884 0 : if (procids == 1) procids = nprxy_y
885 0 : ydistxy(procids) = ydistxy(procids) + 1
886 0 : workleft = workleft - 1
887 0 : if (workleft /= 0) then
888 0 : ydistxy(procidn) = ydistxy(procidn) + 1
889 0 : workleft = workleft - 1
890 : end if
891 0 : procidn = procidn + 1
892 0 : procids = procids - 1
893 : end do
894 : end if
895 :
896 : ! Safety check:
897 99840 : if (sum(ydistxy) /= plat) then
898 0 : write(iulog,*) sub//': ERROR: sum(ydistxy)=', sum(ydistxy),' not equal to plat'
899 0 : call endrun(sub//': ERROR: sum(ydistxy) not equal to plat')
900 : end if
901 1536 : if (workleft/=0) then
902 0 : write(iulog,*) sub//': ERROR: workleft for ydistxy not zero. workleft=',workleft
903 0 : call endrun(sub//': ERROR: workleft for ydistxy not zero')
904 : end if
905 :
906 : ! Compute local limits
907 1536 : call locallimits(myidxy_y, ydistxy, beglatxy, endlatxy)
908 :
909 : ! Auxiliary processes only
910 1536 : if (iam >= npes_xy) then
911 0 : beglonxy = 1
912 0 : endlonxy = 0
913 0 : beglatxy = 1
914 0 : endlatxy = 0
915 : end if
916 :
917 1536 : grid%ifirstxy = beglonxy
918 1536 : grid%ilastxy = endlonxy
919 1536 : grid%jfirstxy = beglatxy
920 1536 : grid%jlastxy = endlatxy
921 :
922 : ! Compute global array for use in dyn_grid module
923 4608 : allocate (latrangexy(2,nprxy_y))
924 1536 : latrangexy(1,1) = 1
925 1536 : latrangexy(2,1) = ydistxy(1)
926 98304 : do procid = 2, nprxy_y
927 96768 : latrangexy(1,procid) = latrangexy(2,procid-1) + 1
928 98304 : latrangexy(2,procid) = latrangexy(1,procid) + ydistxy(procid) - 1
929 : end do
930 :
931 1536 : deallocate(ydist)
932 1536 : deallocate(zdist)
933 :
934 1536 : deallocate(xdistxy)
935 1536 : deallocate(ydistxy)
936 :
937 : ! Calculate the ghost region sizes for the SPMD version (tricky stuff)
938 1536 : grid%ng_c = 2 ! Avoid the case where ng_c = 1
939 1536 : grid%ng_d = min( abs(jord), 3) ! SJL: number of max ghost latitudes
940 1536 : grid%ng_d = max( grid%ng_d, 2)
941 1536 : grid%ng_s = max( grid%ng_c+1, grid%ng_d )
942 :
943 : ! Define imxy, jmxy, jmyz, kmyz from beglonxy, endlonxy, etc.
944 16896 : allocate(tmp(npes), imxy(nprxy_x), jmxy(nprxy_y), jmyz(npr_y), kmyz(npr_z))
945 :
946 1181184 : tmp = 0
947 1536 : tmp(gid+1) = endlonxy - beglonxy + 1
948 1536 : call parcollective( mpicom, maxop, npes, tmp )
949 21504 : imxy(1:nprxy_x) = tmp(1:nprxy_x)
950 :
951 1181184 : tmp = 0
952 1536 : tmp(gid+1) = endlatxy - beglatxy + 1
953 1536 : call parcollective( mpicom, maxop, npes, tmp )
954 99840 : do k = 1, nprxy_y
955 99840 : jmxy(k) = tmp((k-1)*nprxy_x+1)
956 : end do
957 :
958 1181184 : tmp = 0
959 1536 : tmp(gid+1) = grid%jlast - grid%jfirst + 1
960 1536 : call parcollective( mpicom, maxop, npes, tmp )
961 101376 : jmyz(1:grid%npr_y) = tmp(1:grid%npr_y)
962 :
963 1181184 : tmp = 0
964 1536 : tmp(gid+1) = grid%klast - grid%kfirst + 1
965 1536 : call parcollective( mpicom, maxop, npes, tmp )
966 19968 : do k = 1, grid%npr_z
967 19968 : kmyz(k) = tmp((k-1)*grid%npr_y+1)
968 : end do
969 :
970 : ! set up the PILGRIM communications
971 1536 : call spmd_vars_init(imxy, jmxy, jmyz, kmyz, grid)
972 :
973 1536 : deallocate(tmp, imxy, jmxy, jmyz, kmyz)
974 :
975 : #endif
976 6144 : end subroutine spmdinit_dyn
977 :
978 : !========================================================================================
979 :
980 0 : subroutine spmdbuf
981 :
982 0 : end subroutine spmdbuf
983 :
984 : !========================================================================================
985 :
986 0 : subroutine compute_gsfactors(numperlat, numtot, numperproc, displs)
987 :
988 : ! Compute arguments for gatherv, scatterv
989 :
990 : ! arguments
991 : integer, intent(in) :: numperlat ! number of elements per latitude
992 : integer, intent(out) :: numtot ! total number of elements (to send or recv)
993 : integer, intent(out) :: numperproc(0:npes-1) ! per-PE number of items to receive
994 : integer, intent(out) :: displs(0:npes-1) ! per-PE displacements
995 :
996 : #ifdef SPMD
997 : ! Local variables
998 : integer :: p
999 : !---------------------------------------------------------------------------
1000 :
1001 0 : numtot = numperlat*numlats
1002 :
1003 0 : do p = 0, npes-1
1004 0 : numperproc(p) = numperlat*nlat_p(p)
1005 : end do
1006 :
1007 0 : displs(:) = 0
1008 0 : do p = 1, npr_y-1
1009 0 : displs(p) = displs(p-1) + numperproc(p-1)
1010 : end do
1011 :
1012 0 : if (npr_z > 1) then
1013 0 : do p = 1, npr_z-1
1014 0 : displs(p*npr_y:(p+1)*npr_y-1) = displs(0:npr_y-1)
1015 : enddo
1016 : endif
1017 : #endif
1018 :
1019 0 : end subroutine compute_gsfactors
1020 :
1021 : !========================================================================================
1022 :
1023 4608 : subroutine locallimits(myidxy, distxy, begdimxy, enddimxy)
1024 :
1025 : integer, intent(in) :: myidxy
1026 : integer, intent(in) :: distxy(:)
1027 : integer, intent(out) :: begdimxy
1028 : integer, intent(out) :: enddimxy
1029 :
1030 : integer :: procid
1031 :
1032 4608 : begdimxy = 1
1033 4608 : enddimxy = distxy(1)
1034 :
1035 69888 : do procid = 1, myidxy
1036 65280 : begdimxy = enddimxy + 1
1037 69888 : enddimxy = begdimxy + distxy(procid+1) - 1
1038 : end do
1039 4608 : end subroutine locallimits
1040 :
1041 : !========================================================================================
1042 :
1043 : end module spmd_dyn
1044 :
|