Line data Source code
1 : !-----------------------------------------------------------------------
2 : ! $Id$
3 : !===============================================================================
4 : module lapack_wrap
5 :
6 : ! Description:
7 : ! Wrappers for the band diagonal and tridiagonal direct matrix
8 : ! solvers contained in the LAPACK library.
9 :
10 : ! References:
11 : ! LAPACK--Linear Algebra PACKage
12 : ! URL: <http://www.netlib.org/lapack/>
13 : !-----------------------------------------------------------------------
14 : use constants_clubb, only: &
15 : fstderr ! Variable(s)
16 :
17 : use clubb_precision, only: &
18 : core_rknd ! Variable
19 :
20 : implicit none
21 :
22 : ! Simple routines
23 : public :: lapack_tridiag_solve, &
24 : lapack_band_solve
25 :
26 : ! Expert routines
27 : public :: lapack_tridiag_solvex, &
28 : lapack_band_solvex
29 :
30 : private ! Set Default Scope
31 :
32 : contains
33 :
34 : !-----------------------------------------------------------------------
35 0 : subroutine lapack_tridiag_solvex( solve_type, ndim, nrhs, ngrdcol, &
36 0 : lhs, rhs, &
37 0 : soln, rcond )
38 :
39 : ! Description:
40 : ! Solves a tridiagonal system of equations (expert routine).
41 :
42 : ! References:
43 : ! <http://www.netlib.org/lapack/single/sgtsvx.f>
44 : ! <http://www.netlib.org/lapack/double/dgtsvx.f>
45 :
46 : ! Notes:
47 : ! More expensive than the simple routine, but tridiagonal
48 : ! decomposition is still relatively cheap.
49 : !-----------------------------------------------------------------------
50 :
51 : use clubb_precision, only: &
52 : core_rknd ! Variable(s)
53 :
54 : use error_code, only: &
55 : clubb_at_least_debug_level, & ! Procedure
56 : err_code, & ! Error Indicator
57 : clubb_fatal_error ! Constants
58 :
59 : use lapack_interfaces, only: &
60 : lapack_gtsvx, & ! Procedure
61 : lapack_isnan
62 :
63 : implicit none
64 :
65 : intrinsic :: kind
66 :
67 : ! ----------------------- Input variables -----------------------
68 : character(len=*), intent(in) :: &
69 : solve_type ! Used to write a message if this fails
70 :
71 : integer, intent(in) :: &
72 : ndim, & ! N-dimension of matrix
73 : ngrdcol, & ! Number of grid columns
74 : nrhs ! # of right hand sides to back subst. after LU-decomp.
75 :
76 : ! ----------------------- Input/Output variables -----------------------
77 : real( kind = core_rknd ), intent(inout), dimension(3,ngrdcol,ndim) :: &
78 : lhs ! Tridiagonal LHS
79 :
80 : real( kind = core_rknd ), intent(inout), dimension(ngrdcol,ndim,nrhs) :: &
81 : rhs ! RHS input
82 :
83 : ! The estimate of the reciprocal of the condition number on the LHS matrix.
84 : ! If rcond is < machine precision the matrix is singular to working
85 : ! precision, and info == ndim+1. If rcond == 0, then the LHS matrix
86 : ! is singular. This condition is indicated by a return code of info > 0.
87 : real( kind = core_rknd ), dimension(ngrdcol), intent(out) :: rcond
88 :
89 : ! ----------------------- Output variables -----------------------
90 : real( kind = core_rknd ), intent(out), dimension(ngrdcol,ndim,nrhs) :: &
91 : soln ! solution
92 :
93 : ! ----------------------- Local Variables -----------------------
94 : ! These contain the decomposition of the matrix
95 0 : real( kind = core_rknd ), dimension(ndim-1) :: dlf, duf
96 0 : real( kind = core_rknd ), dimension(ndim) :: df
97 0 : real( kind = core_rknd ), dimension(ndim-2) :: du2
98 :
99 : integer, dimension(ndim) :: &
100 0 : ipivot ! Index of pivots done during decomposition
101 :
102 : integer, dimension(ndim) :: &
103 0 : iwork ! `scrap' array
104 :
105 :
106 : real( kind = core_rknd ), dimension(ngrdcol,nrhs) :: &
107 0 : ferr, & ! Forward error estimate
108 0 : berr ! Backward error estimate
109 :
110 : real( kind = core_rknd ), dimension(3*ndim) :: &
111 0 : work ! `Scrap' array
112 :
113 : integer :: info ! Diagnostic output
114 :
115 : integer :: i, n ! Array index
116 :
117 : !-----------------------------------------------------------------------
118 : ! *** The LAPACK Routine ***
119 : ! SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
120 : ! $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
121 : ! $ WORK, IWORK, INFO )
122 : !-----------------------------------------------------------------------
123 :
124 : ! Lapack tridiagonal matrix solver, expert version, sgtsvx for single
125 : ! or dgtsvx for double precision
126 0 : do i = 1, ngrdcol
127 : call lapack_gtsvx( "Not Factored", "No Transpose lhs", ndim, nrhs, &
128 0 : lhs(3,i,2:ndim), lhs(2,i,:), lhs(1,i,1:ndim-1), &
129 : dlf, df, duf, du2, ipivot, &
130 0 : rhs(i,:,:), ndim, soln(i,:,:), ndim, rcond(i), &
131 0 : ferr(i,:), berr(i,:), work, iwork, info )
132 : end do
133 :
134 : ! Print diagnostics for when ferr is large
135 0 : if ( clubb_at_least_debug_level( 2 ) .and. any( ferr > 1.e-3_core_rknd ) ) then
136 :
137 0 : write(fstderr,*) "Warning, large error est. for: " // trim( solve_type )
138 :
139 0 : do n = 1, nrhs
140 0 : do i = 1, ngrdcol
141 0 : write(fstderr,*) "grdcol #", i, "rhs # ", i, "tridiag forward error est. =", ferr(i,n)
142 0 : write(fstderr,*) "grdcol #", i, "rhs # ", i, "tridiag backward error est. =", berr(i,n)
143 : end do
144 : end do
145 :
146 0 : write(fstderr,'(2(a20,e15.6))') "rcond est. = ", rcond, &
147 0 : "machine epsilon = ", epsilon( lhs(1,1,1) )
148 : end if
149 :
150 0 : select case( info )
151 : case( :-1 )
152 : write(fstderr,*) trim( solve_type )// &
153 0 : "illegal value in argument", -info
154 0 : err_code = clubb_fatal_error
155 :
156 : case( 0 )
157 : ! Success
158 0 : do i = 1, ngrdcol
159 0 : if ( lapack_isnan( ndim, nrhs, soln(i,:,:) ) ) then
160 0 : err_code = clubb_fatal_error
161 : end if
162 : end do
163 :
164 : case( 1: )
165 0 : if ( info == ndim+1 ) then
166 : write(fstderr,*) trim( solve_type) // &
167 0 : " Warning: matrix is singular to working precision."
168 : write(fstderr,'(a,e12.5)') &
169 0 : "Estimate of the reciprocal of the condition number: ", rcond
170 : else
171 : write(fstderr,*) solve_type// &
172 0 : " singular matrix."
173 0 : err_code = clubb_fatal_error
174 : end if
175 :
176 : end select
177 :
178 0 : return
179 : end subroutine lapack_tridiag_solvex
180 :
181 : !-----------------------------------------------------------------------
182 45080493 : subroutine lapack_tridiag_solve( solve_type, ndim, nrhs, ngrdcol, &
183 45080493 : lhs, rhs, &
184 45080493 : soln )
185 :
186 : ! Description:
187 : ! Solves a tridiagonal system of equations (simple routine)
188 :
189 : ! References:
190 : ! <http://www.netlib.org/lapack/single/sgtsv.f>
191 : ! <http://www.netlib.org/lapack/double/dgtsv.f>
192 : !-----------------------------------------------------------------------
193 :
194 : use clubb_precision, only: &
195 : core_rknd ! Variable(s)
196 :
197 : #ifdef E3SM
198 : #ifndef NDEBUG
199 : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
200 : use, intrinsic :: ieee_exceptions
201 : #endif
202 : #endif
203 : #endif /*E3SM*/
204 :
205 : use error_code, only: &
206 : err_code, & ! Error Indicator
207 : clubb_fatal_error ! Constants
208 :
209 : use lapack_interfaces, only: &
210 : lapack_gtsv, & ! Procedure
211 : lapack_isnan
212 :
213 : implicit none
214 :
215 : intrinsic :: kind
216 :
217 : ! ----------------------- Input variables -----------------------
218 : character(len=*), intent(in) :: &
219 : solve_type ! Used to write a message if this fails
220 :
221 : integer, intent(in) :: &
222 : ndim, & ! N-dimension of matrix
223 : ngrdcol, & ! Number of grid columns
224 : nrhs ! # of right hand sides to back subst. after LU-decomp.
225 :
226 : ! ----------------------- Input/Output Variables -----------------------
227 : real( kind = core_rknd ), intent(inout), dimension(3,ngrdcol,ndim) :: &
228 : lhs ! Tridiagonal LHS input
229 :
230 : real( kind = core_rknd ), intent(inout), dimension(ngrdcol,ndim,nrhs) :: &
231 : rhs ! RHS input
232 :
233 : ! ----------------------- Output variables -----------------------
234 : real( kind = core_rknd ), intent(out), dimension(ngrdcol,ndim,nrhs) :: &
235 : soln ! solution
236 :
237 : ! ----------------------- Local Variables -----------------------
238 : real( kind = core_rknd ), dimension(ndim) :: &
239 : subd, diag, supd
240 :
241 : integer :: &
242 : info, & ! Diagnostic output
243 : i ! Loop var
244 :
245 : !-----------------------------------------------------------------------
246 : ! *** The LAPACK Routine ***
247 : ! SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
248 : !-----------------------------------------------------------------------
249 :
250 : #ifdef E3SM
251 : #ifndef NDEBUG
252 : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
253 : ! when floating-point exceptions are turned on, this call was failing with
254 : ! a div-by-zero on KNL with Intel/MKL. Solution was to turn off exceptions
255 : ! only here at this call (and only for machine with ARCH_MIC_KNL defined)
256 : ! (github 1183)
257 : call ieee_set_halting_mode(IEEE_DIVIDE_BY_ZERO, .false.) ! Turn off stopping on div-by-zero only
258 : #endif
259 : #endif
260 : #endif /*E3SM*/
261 :
262 : ! Interface for Lapack tridiagonal matrix solver, sgtsv for single
263 : ! or dgtsv for double precision
264 752745117 : do i = 1, ngrdcol
265 4953652368 : call lapack_gtsv( ndim, nrhs, lhs(3,i,2:ndim), lhs(2,i,:), lhs(1,i,1:ndim-1), &
266 >79912*10^7 : rhs(i,:,:), ndim, info )
267 : end do
268 :
269 : #ifdef E3SM
270 : #ifndef NDEBUG
271 : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
272 : ! Turn back on stopping on div-by-zero only
273 : call ieee_set_halting_mode(IEEE_DIVIDE_BY_ZERO, .true.)
274 : #endif
275 : #endif
276 : #endif /*E3SM*/
277 :
278 0 : select case( info )
279 : case( :-1 )
280 : write(fstderr,*) trim( solve_type )// &
281 0 : " illegal value in argument", -info
282 0 : err_code = clubb_fatal_error
283 :
284 0 : soln = -999._core_rknd
285 :
286 : case( 0 )
287 : ! Success
288 752745117 : do i = 1, ngrdcol
289 >21842*10^7 : if ( lapack_isnan( ndim, nrhs, rhs(i,:,:) ) ) then
290 0 : err_code = clubb_fatal_error
291 : end if
292 : end do
293 :
294 >22904*10^7 : soln = rhs
295 :
296 : case( 1: )
297 0 : write(fstderr,*) trim( solve_type )//" singular matrix."
298 0 : err_code = clubb_fatal_error
299 :
300 45080493 : soln = -999._core_rknd
301 :
302 : end select
303 :
304 45080493 : return
305 : end subroutine lapack_tridiag_solve
306 :
307 : !-----------------------------------------------------------------------
308 0 : subroutine lapack_band_solvex( solve_type, nsup, nsub, &
309 : ndim, nrhs, ngrdcol, &
310 0 : lhs, rhs, &
311 0 : soln, rcond )
312 :
313 : ! Description:
314 : ! Restructure and then solve a band diagonal system, with
315 : ! diagnostic output
316 :
317 : ! References:
318 : ! <http://www.netlib.org/lapack/single/sgbsvx.f>
319 : ! <http://www.netlib.org/lapack/double/dgbsvx.f>
320 :
321 : ! Notes:
322 : ! I found that due to the use of sgbcon/dgbcon it is much
323 : ! more expensive to use this on most systems than the simple
324 : ! driver. Use this version only if you don't case about compute time.
325 : ! Also note that this version equilibrates the lhs and does an iterative
326 : ! refinement of the solutions, which results in a slightly different answer
327 : ! than the simple driver does. -dschanen 24 Sep 2008
328 : !-----------------------------------------------------------------------
329 :
330 : use clubb_precision, only: &
331 : core_rknd ! Variable(s)
332 :
333 : use error_code, only: &
334 : clubb_at_least_debug_level, & ! Procedure
335 : err_code, & ! Error Indicator
336 : clubb_fatal_error ! Constants
337 :
338 : use lapack_interfaces, only: &
339 : lapack_gbsvx, & ! Procedures
340 : lapack_isnan
341 :
342 : implicit none
343 :
344 : ! ------------------------------ Input Variables ------------------------------
345 : character(len=*), intent(in) :: solve_type
346 :
347 : integer, intent(in) :: &
348 : nsup, & ! Number of superdiagonals
349 : nsub, & ! Number of subdiagonals
350 : ngrdcol, & ! Number of grid columns
351 : ndim, & ! The order of the LHS Matrix, i.e. the # of linear equations
352 : nrhs ! Number of RHS's to solve for
353 :
354 : ! ------------------------------ InOut Variables ------------------------------
355 : real( kind = core_rknd ), dimension(nsup+nsub+1,ngrdcol,ndim), intent(inout) :: &
356 : lhs ! Left hand side
357 :
358 : real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(inout) :: &
359 : rhs ! Right hand side(s)
360 :
361 : ! ------------------------------ Output Variables ------------------------------
362 : real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(out) :: &
363 : soln
364 :
365 : ! The estimate of the reciprocal condition number of matrix
366 : ! after equilibration (if done).
367 : real( kind = core_rknd ), dimension(ngrdcol), intent(out) :: &
368 : rcond
369 :
370 : ! ------------------------------ Local Variables ------------------------------
371 :
372 : ! Workspaces
373 0 : real( kind = core_rknd ), dimension(3*ndim) :: work
374 0 : integer, dimension(ndim) :: iwork
375 :
376 : real( kind = core_rknd ), dimension(2*nsub+nsup+1,ndim) :: &
377 0 : lulhs ! LU Decomposition of the LHS
378 :
379 : integer, dimension(ndim) :: &
380 0 : ipivot
381 :
382 : real( kind = core_rknd ), dimension(ngrdcol,nrhs) :: &
383 0 : ferr, berr ! Forward and backward error estimate
384 :
385 : real( kind = core_rknd ), dimension(ndim) :: &
386 0 : rscale, cscale ! Row and column scale factors for the LHS
387 :
388 : integer :: &
389 : info, & ! If this doesn't come back as 0, something went wrong
390 : offset, & ! Loop iterator
391 : imain, & ! Main diagonal of the matrix
392 : i,n ! Loop iterator
393 :
394 : character :: &
395 : equed ! Row equilibration status
396 :
397 :
398 : !-----------------------------------------------------------------------
399 : ! Reorder Matrix to use LAPACK band matrix format (5x6)
400 :
401 : ! Shift example:
402 :
403 : ! [ * * lhs(1,1) lhs(1,2) lhs(1,3) lhs(1,4) ] (2)=>
404 : ! [ * lhs(2,1) lhs(2,2) lhs(2,3) lhs(2,4) lhs(2,5) ] (1)=>
405 : ! [ lhs(3,1) lhs(3,2) lhs(3,3) lhs(3,4) lhs(3,5) lhs(3,6) ]
406 : ! <=(1) [ lhs(4,2) lhs(4,3) lhs(4,4) lhs(4,5) lhs(4,6) * ]
407 : ! <=(2) [ lhs(5,3) lhs(5,4) lhs(5,5) lhs(5,6) * * ]
408 :
409 : ! The '*' indicates unreferenced elements.
410 : ! For additional bands above and below the main diagonal, the
411 : ! shifts to the left or right increases by the distance from the
412 : ! main diagonal of the matrix.
413 : !-----------------------------------------------------------------------
414 :
415 0 : imain = nsup + 1
416 :
417 : ! For the offset, (+) is left, and (-) is right
418 :
419 : ! Sub diagonals
420 0 : do i = 1, ngrdcol
421 0 : do offset = 1, nsub, 1
422 0 : lhs(imain+offset,i,1:ndim) = eoshift( lhs(imain+offset,i,1:ndim), offset )
423 : end do
424 : end do
425 :
426 : ! Super diagonals
427 0 : do i = 1, ngrdcol
428 0 : do offset = 1, nsup, 1
429 0 : lhs(imain-offset,i,1:ndim) = eoshift( lhs(imain-offset,i,1:ndim), -offset )
430 : end do
431 : end do
432 :
433 : !-----------------------------------------------------------------------
434 : ! *** The LAPACK Routine ***
435 : ! SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
436 : ! $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
437 : ! $ RCOND, FERR, BERR, WORK, IWORK, INFO )
438 : !-----------------------------------------------------------------------
439 :
440 : ! Lapack general band solver, expert version, sgbsvx for single
441 : ! or dgbsvx for double precision
442 0 : do i = 1, ngrdcol
443 : call lapack_gbsvx( 'Equilibrate lhs', 'No Transpose lhs', &
444 : ndim, nsub, nsup, nrhs, &
445 0 : lhs(:,i,:), nsup+nsub+1, lulhs, 2*nsub+nsup+1, &
446 : ipivot, equed, rscale, cscale, &
447 0 : rhs(i,:,:), ndim, soln(i,:,:), ndim, &
448 0 : rcond(i), ferr(i,:), berr(i,:), work, iwork, info )
449 : end do
450 :
451 :
452 : ! %% debug
453 : ! select case ( equed )
454 : ! case ('N')
455 : ! print *, "No equilib. was required for lhs."
456 : ! case ('R')
457 : ! print *, "Row equilib. was done on lhs."
458 : ! case ('C')
459 : ! print *, "Column equilib. was done on lhs."
460 : ! case ('B')
461 : ! print *, "Row and column equilib. was done on lhs."
462 : ! end select
463 :
464 : ! write(*,'(a,e12.5)') "Row scale : ", rscale
465 : ! write(*,'(a,e12.5)') "Column scale: ", cscale
466 : ! write(*,'(a,e12.5)') "Estimate of the reciprocal of the "//
467 : ! "condition number: ", rcond
468 : ! write(*,'(a,e12.5)') "Forward Error Estimate: ", ferr
469 : ! write(*,'(a,e12.5)') "Backward Error Estimate: ", berr
470 : ! %% end debug
471 :
472 : ! Diagnostic information
473 0 : if ( clubb_at_least_debug_level( 2 ) .and. any( ferr > 1.e-3_core_rknd ) ) then
474 :
475 0 : write(fstderr,*) "Warning, large error est. for: " // trim( solve_type )
476 :
477 0 : do n = 1, nrhs
478 0 : do i = 1, ngrdcol
479 0 : write(fstderr,*) "grdcol #", i, "rhs # ", n, "band_solvex forward error est. =", ferr(i,n)
480 0 : write(fstderr,*) "grdcol #", i, "rhs # ", n, "band_solvex backward error est. =", berr(i,n)
481 : end do
482 : end do
483 :
484 0 : write(fstderr,'(2(a20,e15.6))') "rcond est. = ", rcond, &
485 0 : "machine epsilon = ", epsilon( lhs(1,1,1) )
486 : end if
487 :
488 0 : select case( info )
489 :
490 : case( :-1 )
491 0 : write(fstderr,*) "in band_solvex for ", trim( solve_type ), &
492 0 : ": illegal value for argument", -info
493 0 : err_code = clubb_fatal_error
494 :
495 : case( 0 )
496 : ! Success!
497 0 : do i = 1, ngrdcol
498 0 : if ( lapack_isnan( ndim, nrhs, soln(i,:,:) ) ) then
499 0 : err_code = clubb_fatal_error
500 : end if
501 : end do
502 :
503 : case( 1: )
504 0 : if ( info == ndim+1 ) then
505 :
506 : write(fstderr,*) trim( solve_type )// &
507 0 : " Warning: matrix singular to working precision."
508 : write(fstderr,'(a,e12.5)') &
509 : "Estimate of the reciprocal of the"// &
510 0 : " condition number: ", rcond
511 : else
512 0 : write(fstderr,*) "in band_solvex for", trim( solve_type ), &
513 0 : ": singular matrix, solution not computed"
514 0 : err_code = clubb_fatal_error
515 : end if
516 :
517 : end select
518 :
519 0 : return
520 : end subroutine lapack_band_solvex
521 :
522 : !-----------------------------------------------------------------------
523 17870112 : subroutine lapack_band_solve( solve_type, nsup, nsub, &
524 : ndim, nrhs, ngrdcol, &
525 17870112 : lhs, rhs, &
526 17870112 : soln )
527 : ! Description:
528 : ! Restructure and then solve a band diagonal system
529 :
530 : ! References:
531 : ! <http://www.netlib.org/lapack/single/sgbsv.f>
532 : ! <http://www.netlib.org/lapack/double/dgbsv.f>
533 : !-----------------------------------------------------------------------
534 :
535 : use clubb_precision, only: &
536 : core_rknd ! Variable(s)
537 :
538 : use error_code, only: &
539 : clubb_at_least_debug_level, &
540 : err_code, & ! Error Indicator
541 : clubb_fatal_error ! Constants
542 :
543 : use lapack_interfaces, only: &
544 : lapack_gbsv, & ! Procedures
545 : lapack_isnan
546 :
547 : implicit none
548 :
549 : ! ------------------------------ Input Variables ------------------------------
550 : character(len=*), intent(in) :: solve_type
551 :
552 : integer, intent(in) :: &
553 : nsup, & ! Number of superdiagonals
554 : nsub, & ! Number of subdiagonals
555 : ngrdcol, & ! Number of grid columns
556 : ndim, & ! The order of the LHS Matrix, i.e. the # of linear equations
557 : nrhs ! Number of RHS's to solve for
558 :
559 : ! Note: matrix lhs is intent(in), not intent(inout)
560 : ! as in the subroutine band_solvex( )
561 : real( kind = core_rknd ), dimension(nsup+nsub+1,ngrdcol,ndim), intent(in) :: &
562 : lhs ! Left hand side
563 :
564 : real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(inout) :: &
565 : rhs ! Right hand side(s)
566 :
567 : ! ------------------------------ Output Variables ------------------------------
568 : real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(out) :: &
569 : soln
570 :
571 : ! ------------------------------ Local Variables ------------------------------
572 :
573 : ! Workspaces
574 : real( kind = core_rknd ), dimension(2*nsub+nsup+1,ndim,ngrdcol) :: &
575 35740224 : lulhs ! LU Decomposition of the LHS
576 :
577 : integer, dimension(ndim) :: &
578 35740224 : ipivot
579 :
580 : integer :: &
581 : info, & ! If this doesn't come back as 0, something went wrong
582 : imain ! Main diagonal of the matrix
583 :
584 : integer :: i, j, d
585 :
586 : !-----------------------------------------------------------------------
587 : ! Reorder LU Matrix to use LAPACK band matrix format
588 : !
589 : ! Shift example for lulhs matrix given a 5x5 lhs matrix
590 : !
591 : !
592 : ! lulhs =
593 : ! Columns
594 : ! 1 2 3 4 5 6 7
595 : ! Rows
596 : ! 1 [ 0 0 0 0 lhs(3,1) lhs(4,2) lhs(5,3) ]
597 : ! 2 [ 0 0 0 lhs(2,1) lhs(3,2) lhs(4,3) lhs(5,4) ]
598 : ! 3 [ 0 0 lhs(1,1) lhs(2,2) lhs(3,3) lhs(4,4) lhs(5,5) ]
599 : ! 4 [ 0 0 lhs(1,2) lhs(2,3) lhs(3,4) lhs(4,5) 0 ]
600 : ! 5 [ 0 0 lhs(1,3) lhs(2,4) lhs(3,5) 0 0 ]
601 : !
602 : ! all lhs lhs lhs lhs lhs
603 : ! set to shifted shifted no shifted shifted
604 : ! 0 down 2 down 1 shift up 1 up 2
605 : !
606 : ! The first nsup columns of lulhs are always set to 0;
607 : ! the rest of the columns are set to shifted
608 : ! columns of lhs. This can be thought of as taking lhs, never touching the middle column, but
609 : ! shifting the columns that are n columns to the left of the middle down by n rows, and then
610 : ! shifting the columns that are n columns to the right of the middle up by n rows, finally
611 : ! adding nsup columns of zeros onto the left of the array. This results in lulhs.
612 : !-----------------------------------------------------------------------
613 :
614 : ! Reorder lulhs, omitting the additional 2*nsub bands
615 : ! that are used for the LU decomposition of the matrix.
616 :
617 17870112 : imain = nsub + nsup + 1
618 :
619 :
620 : ! The first nsup rows of lulhs will contain 0s that are end-shifted lhs values. This needs
621 : ! to be handled differently so the algorithm to access lhs will not try to use out of bound
622 : ! values.
623 : ! ... nsup nsup+1 ... imain ...
624 : ! \ / \ /
625 : ! always | begins with nsup | all lhs values
626 : ! 0 0s, and decreases
627 : ! by one 0 each row
628 : !
629 : ! ... nsup nsup+1 ... imain ...
630 : ! lulhs(:,1) = 0 0 0 0 lhs lhs
631 : ! lulhs(:,2) = 0 0 0 lhs lhs lhs
632 : !
633 : ! Since the first nsup rows are the first rows in lulhs, we're going to access them first to
634 : ! avoid out of order memory accesses.
635 298389312 : do i = 1, ngrdcol
636 859427712 : do d = 1, nsup
637 :
638 : ! Add 0s to first nsup columns, and decreasing number of end-shift affected columns
639 2524672800 : do j = 1, imain-d
640 2524672800 : lulhs(j,d,i) = 0.0_core_rknd
641 : end do
642 :
643 : ! Copy lhs values into appropriate lulhs spots
644 2805192000 : do j = imain-d+1, imain+nsub
645 2524672800 : lulhs(j,d,i) = lhs(j-nsub,i,d+j-imain)
646 : end do
647 :
648 : end do
649 : end do
650 :
651 : ! After the first nsup rows are dealt with, the offset lhs values can be copied into lulhs
652 : ! until the last nsup rows are reached. This is because the last nsup rows also contain
653 : ! end-shifted values, set to 0 in the next loop.
654 : !
655 : ! ... nsup nsup+1 ...
656 : ! \ /
657 : ! always | all lhs values
658 : !
659 : ! ... nsup nsup+1 ...
660 : ! lulhs(:,nsup+1) = 0 0 lhs lhs
661 : ! lulhs(:,ndim-nsub) = 0 0 lhs lhs
662 : !
663 : ! For all values not affected by end-shifting
664 298389312 : do i = 1, ngrdcol
665 46864576512 : do d = nsup+1, ndim-nsub
666 :
667 : ! Set first nsup columns to 0
668 >13969*10^7 : do j = 1, nsub
669 >13969*10^7 : lulhs(j,d,i) = 0.0_core_rknd
670 : end do
671 :
672 : ! Copy lhs values into appropriate lulhs spots
673 >27967*10^7 : do j = imain-nsub, imain+nsub
674 >27939*10^7 : lulhs(j,d,i) = lhs(j-nsub,i, d+j-imain)
675 : end do
676 :
677 : end do
678 : end do
679 :
680 :
681 : ! The last nsup rows of lulhs will contain 0s that are end-shifted lhs values. This needs
682 : ! to be handled differently so the algorithm to access lhs will not try to use out of bound
683 : ! values.
684 : !
685 : !
686 : ! ... nsup nsup+1 ... imain+1 ...
687 : ! lulhs(:,ndim-nsub+1) = 0 0 lhs lhs lhs 0
688 : ! lulhs(:,ndim) = 0 0 lhs lhs 0 0
689 : !
690 : ! | | starts with one 0, then
691 : ! always | all lhs values | then increases to nsup 0s
692 : ! | | towards ndim
693 : ! / \ / \
694 : ! ... nsup nsup+1 ... ndim-nsub+1 ... ndim
695 : !
696 : ! Finish the lulhs setup by accessing the last values last, keeping memory access ordered
697 298389312 : do i = 1, ngrdcol
698 859427712 : do d = ndim-nsub+1, ndim
699 :
700 : ! Set first nsup columns to 0
701 1683115200 : do j = 1, nsub
702 1683115200 : lulhs(j,d,i) = 0.0_core_rknd
703 : end do
704 :
705 : ! Copy lhs values into appropriate lulhs spots
706 2524672800 : do j = imain-nsup, imain-(d-ndim)
707 2524672800 : lulhs(j,d,i) = lhs(j-nsub,i, d+j-imain)
708 : end do
709 :
710 : ! Set increasing number of end-shift affected columns to 0
711 1683115200 : do j = imain-(d-ndim)+1, imain+nsub
712 1402596000 : lulhs(j,d,i) = 0.0_core_rknd
713 : end do
714 :
715 : end do
716 : end do
717 :
718 : !-----------------------------------------------------------------------
719 : ! *** LAPACK routine ***
720 : ! SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
721 : !-----------------------------------------------------------------------
722 :
723 : ! Lapack general band solver, sgbsv for single
724 : ! or dgbsv for double precision
725 298389312 : do i = 1, ngrdcol
726 : call lapack_gbsv( ndim, nsub, nsup, nrhs, lulhs(:,:,i), nsub*2+nsup+1, &
727 >24014*10^7 : ipivot, rhs(i,:,:), ndim, info )
728 : end do
729 :
730 :
731 0 : select case( info )
732 :
733 : case( :-1 )
734 0 : write(fstderr,*) "in band_solve for ", trim( solve_type ), &
735 0 : ": illegal value for argument", -info
736 0 : err_code = clubb_fatal_error
737 : case( 0 )
738 : ! Success!
739 17870112 : if ( clubb_at_least_debug_level( 1 ) ) then
740 0 : do i = 1, ngrdcol
741 0 : if ( lapack_isnan( ndim, nrhs, rhs(i,:,:) ) ) then
742 0 : err_code = clubb_fatal_error
743 : end if
744 : end do
745 : end if
746 :
747 >12687*10^7 : soln = rhs
748 :
749 : case( 1: )
750 0 : write(fstderr,*) "in band_solve for ", trim( solve_type ), &
751 0 : ": singular matrix, solution not computed"
752 17870112 : err_code = clubb_fatal_error
753 : end select
754 :
755 17870112 : return
756 : end subroutine lapack_band_solve
757 :
758 : end module lapack_wrap
|