LCOV - code coverage report
Current view: top level - physics/clubb/src/CLUBB_core - lapack_wrap.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 43 136 31.6 %
Date: 2024-12-17 17:57:11 Functions: 2 4 50.0 %

          Line data    Source code
       1             : !-----------------------------------------------------------------------
       2             : ! $Id$
       3             : !===============================================================================
       4             : module lapack_wrap
       5             : 
       6             : ! Description:
       7             : !   Wrappers for the band diagonal and tridiagonal direct matrix
       8             : !   solvers contained in the LAPACK library.
       9             : 
      10             : ! References:
      11             : !   LAPACK--Linear Algebra PACKage
      12             : !   URL: <http://www.netlib.org/lapack/>
      13             : !-----------------------------------------------------------------------
      14             :   use constants_clubb, only:  & 
      15             :     fstderr ! Variable(s)
      16             : 
      17             :   use clubb_precision, only: &
      18             :     core_rknd ! Variable
      19             : 
      20             :   implicit none
      21             : 
      22             :   ! Simple routines
      23             :   public :: lapack_tridiag_solve, &
      24             :             lapack_band_solve
      25             : 
      26             :   ! Expert routines
      27             :   public :: lapack_tridiag_solvex, &
      28             :             lapack_band_solvex
      29             : 
      30             :   private ! Set Default Scope
      31             : 
      32             :   contains
      33             : 
      34             : !-----------------------------------------------------------------------
      35           0 :   subroutine lapack_tridiag_solvex( solve_type, ndim, nrhs, ngrdcol, &
      36           0 :                                     lhs, rhs, &
      37           0 :                                     soln, rcond )
      38             : 
      39             : ! Description:
      40             : !   Solves a tridiagonal system of equations (expert routine).
      41             : 
      42             : ! References:
      43             : !   <http://www.netlib.org/lapack/single/sgtsvx.f>
      44             : !   <http://www.netlib.org/lapack/double/dgtsvx.f>
      45             : 
      46             : ! Notes:
      47             : !   More expensive than the simple routine, but tridiagonal
      48             : !   decomposition is still relatively cheap.
      49             : !-----------------------------------------------------------------------
      50             : 
      51             :     use clubb_precision, only: &
      52             :         core_rknd ! Variable(s)
      53             : 
      54             :     use error_code, only: &
      55             :         clubb_at_least_debug_level,  & ! Procedure  
      56             :         err_code,                    & ! Error Indicator
      57             :         clubb_fatal_error              ! Constants
      58             : 
      59             :     use lapack_interfaces, only: &
      60             :         lapack_gtsvx, &      ! Procedure
      61             :         lapack_isnan
      62             : 
      63             :     implicit none
      64             : 
      65             :     intrinsic :: kind
      66             : 
      67             :     ! ----------------------- Input variables -----------------------
      68             :     character(len=*), intent(in) ::  & 
      69             :       solve_type ! Used to write a message if this fails
      70             : 
      71             :     integer, intent(in) ::  & 
      72             :       ndim,     & ! N-dimension of matrix
      73             :       ngrdcol,  & ! Number of grid columns
      74             :       nrhs        ! # of right hand sides to back subst. after LU-decomp.
      75             : 
      76             :     ! ----------------------- Input/Output variables -----------------------
      77             :     real( kind = core_rknd ), intent(inout), dimension(3,ngrdcol,ndim) ::  & 
      78             :       lhs ! Tridiagonal LHS
      79             : 
      80             :     real( kind = core_rknd ), intent(inout), dimension(ngrdcol,ndim,nrhs) ::  & 
      81             :       rhs ! RHS input
      82             : 
      83             :     ! The estimate of the reciprocal of the condition number on the LHS matrix.
      84             :     ! If rcond is < machine precision the matrix is singular to working
      85             :     ! precision, and info == ndim+1.  If rcond == 0, then the LHS matrix
      86             :     ! is singular.  This condition is indicated by a return code of info > 0.
      87             :     real( kind = core_rknd ), dimension(ngrdcol), intent(out) :: rcond
      88             : 
      89             :     ! ----------------------- Output variables -----------------------
      90             :     real( kind = core_rknd ), intent(out), dimension(ngrdcol,ndim,nrhs) ::  & 
      91             :       soln ! solution
      92             : 
      93             :     ! ----------------------- Local Variables -----------------------
      94             :     ! These contain the decomposition of the matrix
      95           0 :     real( kind = core_rknd ), dimension(ndim-1) :: dlf, duf
      96           0 :     real( kind = core_rknd ), dimension(ndim)   :: df
      97           0 :     real( kind = core_rknd ), dimension(ndim-2) :: du2
      98             : 
      99             :     integer, dimension(ndim) ::  & 
     100           0 :       ipivot  ! Index of pivots done during decomposition
     101             : 
     102             :     integer, dimension(ndim) ::  & 
     103           0 :       iwork   ! `scrap' array
     104             : 
     105             : 
     106             :     real( kind = core_rknd ), dimension(ngrdcol,nrhs) ::  & 
     107           0 :       ferr,  & ! Forward error estimate
     108           0 :       berr     ! Backward error estimate
     109             : 
     110             :     real( kind = core_rknd ), dimension(3*ndim) ::  & 
     111           0 :       work  ! `Scrap' array
     112             : 
     113             :     integer :: info ! Diagnostic output
     114             : 
     115             :     integer :: i, n  ! Array index
     116             : 
     117             : !-----------------------------------------------------------------------
     118             : !     *** The LAPACK Routine ***
     119             : !     SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
     120             : !    $                   DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
     121             : !    $                   WORK, IWORK, INFO )
     122             : !-----------------------------------------------------------------------
     123             : 
     124             :     ! Lapack tridiagonal matrix solver, expert version, sgtsvx for single 
     125             :     ! or dgtsvx for double precision
     126           0 :     do i = 1, ngrdcol
     127             :       call lapack_gtsvx( "Not Factored", "No Transpose lhs", ndim, nrhs,  & 
     128           0 :                          lhs(3,i,2:ndim), lhs(2,i,:), lhs(1,i,1:ndim-1),  & 
     129             :                          dlf, df, duf, du2, ipivot,  & 
     130           0 :                          rhs(i,:,:), ndim, soln(i,:,:), ndim, rcond(i), & 
     131           0 :                          ferr(i,:), berr(i,:), work, iwork, info )
     132             :     end do
     133             : 
     134             :     ! Print diagnostics for when ferr is large
     135           0 :     if ( clubb_at_least_debug_level( 2 ) .and. any( ferr > 1.e-3_core_rknd ) ) then
     136             : 
     137           0 :       write(fstderr,*) "Warning, large error est. for: " // trim( solve_type )
     138             : 
     139           0 :       do n = 1, nrhs
     140           0 :         do i = 1, ngrdcol
     141           0 :           write(fstderr,*) "grdcol #", i, "rhs # ", i, "tridiag forward error est. =", ferr(i,n)
     142           0 :           write(fstderr,*) "grdcol #", i, "rhs # ", i, "tridiag backward error est. =", berr(i,n)
     143             :         end do
     144             :       end do
     145             : 
     146           0 :       write(fstderr,'(2(a20,e15.6))') "rcond est. = ", rcond, & 
     147           0 :         "machine epsilon = ", epsilon( lhs(1,1,1) )
     148             :     end if
     149             : 
     150           0 :     select case( info )
     151             :     case( :-1 )
     152             :       write(fstderr,*) trim( solve_type )// & 
     153           0 :         "illegal value in argument", -info
     154           0 :       err_code = clubb_fatal_error
     155             : 
     156             :     case( 0 )
     157             :       ! Success
     158           0 :       do i = 1, ngrdcol
     159           0 :         if ( lapack_isnan( ndim, nrhs, soln(i,:,:) ) ) then
     160           0 :           err_code = clubb_fatal_error 
     161             :         end if
     162             :       end do
     163             : 
     164             :     case( 1: )
     165           0 :       if ( info == ndim+1 ) then
     166             :         write(fstderr,*) trim( solve_type) // & 
     167           0 :           " Warning: matrix is singular to working precision."
     168             :         write(fstderr,'(a,e12.5)')  & 
     169           0 :           "Estimate of the reciprocal of the condition number: ", rcond
     170             :       else
     171             :         write(fstderr,*) solve_type// & 
     172           0 :           " singular matrix."
     173           0 :         err_code = clubb_fatal_error
     174             :       end if
     175             : 
     176             :     end select
     177             : 
     178           0 :     return
     179             :   end subroutine lapack_tridiag_solvex
     180             : 
     181             : !-----------------------------------------------------------------------
     182    45080493 :   subroutine lapack_tridiag_solve( solve_type, ndim, nrhs, ngrdcol, &
     183    45080493 :                                    lhs, rhs, &
     184    45080493 :                                    soln )
     185             : 
     186             : ! Description:
     187             : !   Solves a tridiagonal system of equations (simple routine)
     188             : 
     189             : ! References:
     190             : !   <http://www.netlib.org/lapack/single/sgtsv.f>
     191             : !   <http://www.netlib.org/lapack/double/dgtsv.f>
     192             : !-----------------------------------------------------------------------
     193             : 
     194             :     use clubb_precision, only: &
     195             :         core_rknd ! Variable(s)
     196             : 
     197             : #ifdef E3SM
     198             : #ifndef NDEBUG
     199             : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
     200             :     use, intrinsic :: ieee_exceptions
     201             : #endif
     202             : #endif
     203             : #endif /*E3SM*/
     204             : 
     205             :     use error_code, only: &
     206             :         err_code,                    & ! Error Indicator
     207             :         clubb_fatal_error              ! Constants
     208             :       
     209             :     use lapack_interfaces, only: &
     210             :         lapack_gtsv, &       ! Procedure
     211             :         lapack_isnan
     212             : 
     213             :     implicit none
     214             : 
     215             :     intrinsic :: kind
     216             : 
     217             :     ! ----------------------- Input variables -----------------------
     218             :     character(len=*), intent(in) ::  & 
     219             :       solve_type ! Used to write a message if this fails
     220             : 
     221             :     integer, intent(in) ::  & 
     222             :       ndim,     & ! N-dimension of matrix
     223             :       ngrdcol,  & ! Number of grid columns
     224             :       nrhs        ! # of right hand sides to back subst. after LU-decomp.
     225             : 
     226             :     ! ----------------------- Input/Output Variables -----------------------
     227             :     real( kind = core_rknd ), intent(inout), dimension(3,ngrdcol,ndim) ::  & 
     228             :       lhs ! Tridiagonal LHS input
     229             : 
     230             :     real( kind = core_rknd ), intent(inout), dimension(ngrdcol,ndim,nrhs) ::  & 
     231             :       rhs ! RHS input
     232             : 
     233             :     ! ----------------------- Output variables -----------------------
     234             :     real( kind = core_rknd ), intent(out), dimension(ngrdcol,ndim,nrhs) ::  & 
     235             :       soln ! solution
     236             : 
     237             :     ! ----------------------- Local Variables -----------------------
     238             :     real( kind = core_rknd ), dimension(ndim) ::  & 
     239             :       subd, diag, supd
     240             : 
     241             :     integer :: &
     242             :       info, & ! Diagnostic output
     243             :       i       ! Loop var
     244             : 
     245             : !-----------------------------------------------------------------------
     246             : !       *** The LAPACK Routine ***
     247             : !       SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
     248             : !-----------------------------------------------------------------------
     249             : 
     250             : #ifdef E3SM
     251             : #ifndef NDEBUG
     252             : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
     253             :     ! when floating-point exceptions are turned on, this call was failing with
     254             :     ! a div-by-zero on KNL with Intel/MKL. Solution was to turn off exceptions
     255             :     ! only here at this call (and only for machine with ARCH_MIC_KNL defined)
     256             :     ! (github 1183)
     257             :     call ieee_set_halting_mode(IEEE_DIVIDE_BY_ZERO, .false.) ! Turn off stopping on div-by-zero only
     258             : #endif
     259             : #endif
     260             : #endif /*E3SM*/
     261             : 
     262             :     ! Interface for Lapack tridiagonal matrix solver, sgtsv for single 
     263             :     ! or dgtsv for double precision
     264   752745117 :     do i = 1, ngrdcol
     265  4953652368 :       call lapack_gtsv( ndim, nrhs, lhs(3,i,2:ndim), lhs(2,i,:), lhs(1,i,1:ndim-1),  & 
     266 >79912*10^7 :                         rhs(i,:,:), ndim, info )
     267             :     end do
     268             : 
     269             : #ifdef E3SM
     270             : #ifndef NDEBUG
     271             : #if defined(ARCH_MIC_KNL) && defined(CPRINTEL)
     272             :     ! Turn back on stopping on div-by-zero only 
     273             :     call ieee_set_halting_mode(IEEE_DIVIDE_BY_ZERO, .true.)
     274             : #endif
     275             : #endif
     276             : #endif /*E3SM*/
     277             : 
     278           0 :     select case( info )
     279             :     case( :-1 )
     280             :       write(fstderr,*) trim( solve_type )// & 
     281           0 :         " illegal value in argument", -info
     282           0 :       err_code = clubb_fatal_error
     283             : 
     284           0 :       soln = -999._core_rknd
     285             : 
     286             :     case( 0 )
     287             :       ! Success
     288   752745117 :       do i = 1, ngrdcol
     289 >21842*10^7 :         if ( lapack_isnan( ndim, nrhs, rhs(i,:,:) ) ) then
     290           0 :           err_code = clubb_fatal_error 
     291             :         end if
     292             :       end do
     293             : 
     294 >22904*10^7 :       soln = rhs
     295             : 
     296             :     case( 1: )
     297           0 :       write(fstderr,*) trim( solve_type )//" singular matrix."
     298           0 :       err_code = clubb_fatal_error
     299             : 
     300    45080493 :       soln = -999._core_rknd
     301             : 
     302             :     end select
     303             : 
     304    45080493 :     return
     305             :   end subroutine lapack_tridiag_solve
     306             : 
     307             : !-----------------------------------------------------------------------
     308           0 :   subroutine lapack_band_solvex( solve_type, nsup, nsub, &
     309             :                                  ndim, nrhs, ngrdcol, &
     310           0 :                                  lhs, rhs, &
     311           0 :                                  soln, rcond )
     312             : 
     313             : ! Description:
     314             : !   Restructure and then solve a band diagonal system, with
     315             : !   diagnostic output
     316             : 
     317             : ! References:
     318             : !   <http://www.netlib.org/lapack/single/sgbsvx.f>
     319             : !   <http://www.netlib.org/lapack/double/dgbsvx.f>
     320             : 
     321             : ! Notes:
     322             : !   I found that due to the use of sgbcon/dgbcon it is much
     323             : !   more expensive to use this on most systems than the simple
     324             : !   driver. Use this version only if you don't case about compute time.
     325             : !   Also note that this version equilibrates the lhs and does an iterative
     326             : !   refinement of the solutions, which results in a slightly different answer
     327             : !   than the simple driver does. -dschanen 24 Sep 2008
     328             : !-----------------------------------------------------------------------
     329             : 
     330             :     use clubb_precision, only: &
     331             :         core_rknd ! Variable(s)
     332             : 
     333             :     use error_code, only: &
     334             :         clubb_at_least_debug_level,  & ! Procedure  
     335             :         err_code,                    & ! Error Indicator
     336             :         clubb_fatal_error              ! Constants
     337             : 
     338             :     use lapack_interfaces, only: &
     339             :         lapack_gbsvx, &      ! Procedures
     340             :         lapack_isnan
     341             : 
     342             :     implicit none
     343             : 
     344             :     ! ------------------------------ Input Variables ------------------------------
     345             :     character(len=*), intent(in) :: solve_type
     346             : 
     347             :     integer, intent(in) :: & 
     348             :       nsup,    & ! Number of superdiagonals
     349             :       nsub,    & ! Number of subdiagonals
     350             :       ngrdcol, & ! Number of grid columns
     351             :       ndim,    & ! The order of the LHS Matrix, i.e. the # of linear equations
     352             :       nrhs       ! Number of RHS's to solve for
     353             : 
     354             :     ! ------------------------------ InOut Variables ------------------------------
     355             :     real( kind = core_rknd ), dimension(nsup+nsub+1,ngrdcol,ndim), intent(inout) ::  & 
     356             :       lhs ! Left hand side
     357             : 
     358             :     real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(inout) ::  & 
     359             :       rhs ! Right hand side(s)
     360             : 
     361             :     ! ------------------------------ Output Variables ------------------------------
     362             :     real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(out) :: &
     363             :       soln
     364             : 
     365             :     ! The estimate of the reciprocal condition number of matrix
     366             :     ! after equilibration (if done).
     367             :     real( kind = core_rknd ), dimension(ngrdcol), intent(out) ::  & 
     368             :       rcond
     369             : 
     370             :     ! ------------------------------ Local Variables ------------------------------
     371             : 
     372             :     ! Workspaces
     373           0 :     real( kind = core_rknd ), dimension(3*ndim)  :: work
     374           0 :     integer, dimension(ndim) :: iwork
     375             : 
     376             :     real( kind = core_rknd ), dimension(2*nsub+nsup+1,ndim) :: & 
     377           0 :       lulhs ! LU Decomposition of the LHS
     378             : 
     379             :     integer, dimension(ndim) ::  & 
     380           0 :       ipivot
     381             : 
     382             :     real( kind = core_rknd ), dimension(ngrdcol,nrhs) ::  & 
     383           0 :       ferr, berr ! Forward and backward error estimate
     384             : 
     385             :     real( kind = core_rknd ), dimension(ndim) ::  & 
     386           0 :       rscale, cscale ! Row and column scale factors for the LHS
     387             : 
     388             :     integer ::  & 
     389             :       info,   & ! If this doesn't come back as 0, something went wrong
     390             :       offset, & ! Loop iterator
     391             :       imain,  & ! Main diagonal of the matrix
     392             :       i,n       ! Loop iterator
     393             : 
     394             :     character ::  & 
     395             :       equed ! Row equilibration status
     396             : 
     397             : 
     398             : !-----------------------------------------------------------------------
     399             : !       Reorder Matrix to use LAPACK band matrix format (5x6)
     400             : 
     401             : !       Shift example:
     402             : 
     403             : !       [    *        *     lhs(1,1) lhs(1,2) lhs(1,3) lhs(1,4) ] (2)=>
     404             : !       [    *     lhs(2,1) lhs(2,2) lhs(2,3) lhs(2,4) lhs(2,5) ] (1)=>
     405             : !       [ lhs(3,1) lhs(3,2) lhs(3,3) lhs(3,4) lhs(3,5) lhs(3,6) ]
     406             : ! <=(1) [ lhs(4,2) lhs(4,3) lhs(4,4) lhs(4,5) lhs(4,6)    *     ]
     407             : ! <=(2) [ lhs(5,3) lhs(5,4) lhs(5,5) lhs(5,6)    *        *     ]
     408             : 
     409             : !       The '*' indicates unreferenced elements.
     410             : !       For additional bands above and below the main diagonal, the
     411             : !       shifts to the left or right increases by the distance from the
     412             : !       main diagonal of the matrix.
     413             : !-----------------------------------------------------------------------
     414             : 
     415           0 :     imain = nsup + 1
     416             : 
     417             :     ! For the offset, (+) is left, and (-) is right
     418             : 
     419             :     ! Sub diagonals
     420           0 :     do i = 1, ngrdcol
     421           0 :       do offset = 1, nsub, 1
     422           0 :         lhs(imain+offset,i,1:ndim) = eoshift( lhs(imain+offset,i,1:ndim), offset )
     423             :       end do
     424             :     end do
     425             : 
     426             :     ! Super diagonals
     427           0 :     do i = 1, ngrdcol
     428           0 :       do offset = 1, nsup, 1
     429           0 :         lhs(imain-offset,i,1:ndim) = eoshift( lhs(imain-offset,i,1:ndim), -offset )
     430             :       end do
     431             :     end do
     432             : 
     433             : !-----------------------------------------------------------------------
     434             : !     *** The LAPACK Routine ***
     435             : !     SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
     436             : !    $                   LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
     437             : !    $                   RCOND, FERR, BERR, WORK, IWORK, INFO )
     438             : !-----------------------------------------------------------------------
     439             : 
     440             :     ! Lapack general band solver, expert version, sgbsvx for single 
     441             :     ! or dgbsvx for double precision
     442           0 :     do i = 1, ngrdcol
     443             :       call lapack_gbsvx( 'Equilibrate lhs', 'No Transpose lhs', & 
     444             :                          ndim, nsub, nsup, nrhs, & 
     445           0 :                          lhs(:,i,:), nsup+nsub+1, lulhs, 2*nsub+nsup+1, & 
     446             :                          ipivot, equed, rscale, cscale, & 
     447           0 :                          rhs(i,:,:), ndim, soln(i,:,:), ndim, & 
     448           0 :                          rcond(i), ferr(i,:), berr(i,:), work, iwork, info )
     449             :     end do
     450             : 
     451             : 
     452             : ! %% debug
     453             : !       select case ( equed )
     454             : !       case ('N')
     455             : !         print *, "No equilib. was required for lhs."
     456             : !       case ('R')
     457             : !         print *, "Row equilib. was done on lhs."
     458             : !       case ('C')
     459             : !         print *, "Column equilib. was done on lhs."
     460             : !       case ('B')
     461             : !         print *, "Row and column equilib. was done on lhs."
     462             : !       end select
     463             : 
     464             : !       write(*,'(a,e12.5)') "Row scale : ", rscale
     465             : !       write(*,'(a,e12.5)') "Column scale: ", cscale
     466             : !       write(*,'(a,e12.5)') "Estimate of the reciprocal of the "//
     467             : !                            "condition number: ", rcond
     468             : !       write(*,'(a,e12.5)') "Forward Error Estimate: ", ferr
     469             : !       write(*,'(a,e12.5)') "Backward Error Estimate: ", berr
     470             : ! %% end debug
     471             : 
     472             :     ! Diagnostic information
     473           0 :     if ( clubb_at_least_debug_level( 2 ) .and. any( ferr > 1.e-3_core_rknd ) ) then
     474             : 
     475           0 :       write(fstderr,*) "Warning, large error est. for: " // trim( solve_type )
     476             : 
     477           0 :       do n = 1, nrhs
     478           0 :         do i = 1, ngrdcol
     479           0 :           write(fstderr,*) "grdcol #", i, "rhs # ", n, "band_solvex forward error est. =", ferr(i,n)
     480           0 :           write(fstderr,*) "grdcol #", i, "rhs # ", n, "band_solvex backward error est. =", berr(i,n)
     481             :         end do
     482             :       end do
     483             : 
     484           0 :       write(fstderr,'(2(a20,e15.6))') "rcond est. = ", rcond, & 
     485           0 :         "machine epsilon = ", epsilon( lhs(1,1,1) )
     486             :     end if
     487             : 
     488           0 :     select case( info )
     489             : 
     490             :     case( :-1 )
     491           0 :         write(fstderr,*) "in band_solvex for ", trim( solve_type ), &
     492           0 :             ": illegal value for argument", -info
     493           0 :         err_code = clubb_fatal_error
     494             : 
     495             :     case( 0 )
     496             :       ! Success!
     497           0 :       do i = 1, ngrdcol
     498           0 :         if ( lapack_isnan( ndim, nrhs, soln(i,:,:) ) ) then
     499           0 :           err_code = clubb_fatal_error 
     500             :         end if
     501             :       end do
     502             : 
     503             :     case( 1: )
     504           0 :       if ( info == ndim+1 ) then
     505             : 
     506             :         write(fstderr,*) trim( solve_type )// & 
     507           0 :           " Warning: matrix singular to working precision."
     508             :         write(fstderr,'(a,e12.5)')  & 
     509             :           "Estimate of the reciprocal of the"// & 
     510           0 :           " condition number: ", rcond
     511             :       else
     512           0 :         write(fstderr,*) "in band_solvex for", trim( solve_type ), &
     513           0 :           ": singular matrix, solution not computed"    
     514           0 :         err_code = clubb_fatal_error
     515             :       end if
     516             : 
     517             :     end select
     518             : 
     519           0 :     return
     520             :   end subroutine lapack_band_solvex
     521             : 
     522             : !-----------------------------------------------------------------------
     523    17870112 :   subroutine lapack_band_solve( solve_type, nsup, nsub, &
     524             :                                 ndim, nrhs, ngrdcol, &
     525    17870112 :                                 lhs, rhs, &
     526    17870112 :                                 soln )
     527             : ! Description:
     528             : !   Restructure and then solve a band diagonal system
     529             : 
     530             : ! References:
     531             : !   <http://www.netlib.org/lapack/single/sgbsv.f>
     532             : !   <http://www.netlib.org/lapack/double/dgbsv.f>
     533             : !-----------------------------------------------------------------------
     534             : 
     535             :     use clubb_precision, only: &
     536             :         core_rknd ! Variable(s)
     537             : 
     538             :     use error_code, only: &
     539             :         clubb_at_least_debug_level, &
     540             :         err_code,                    & ! Error Indicator
     541             :         clubb_fatal_error              ! Constants
     542             : 
     543             :     use lapack_interfaces, only: &
     544             :         lapack_gbsv, &       ! Procedures
     545             :         lapack_isnan
     546             : 
     547             :     implicit none
     548             : 
     549             :     ! ------------------------------ Input Variables ------------------------------
     550             :     character(len=*), intent(in) :: solve_type
     551             : 
     552             :     integer, intent(in) :: & 
     553             :       nsup,    & ! Number of superdiagonals
     554             :       nsub,    & ! Number of subdiagonals
     555             :       ngrdcol, & ! Number of grid columns
     556             :       ndim,    & ! The order of the LHS Matrix, i.e. the # of linear equations
     557             :       nrhs       ! Number of RHS's to solve for
     558             : 
     559             :     ! Note: matrix lhs is intent(in), not intent(inout)
     560             :     ! as in the subroutine band_solvex( )
     561             :     real( kind = core_rknd ), dimension(nsup+nsub+1,ngrdcol,ndim), intent(in) ::  & 
     562             :       lhs ! Left hand side
     563             : 
     564             :     real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(inout) ::  & 
     565             :       rhs ! Right hand side(s)
     566             : 
     567             :     ! ------------------------------ Output Variables ------------------------------
     568             :     real( kind = core_rknd ), dimension(ngrdcol,ndim,nrhs), intent(out) :: &
     569             :       soln
     570             : 
     571             :     ! ------------------------------ Local Variables ------------------------------
     572             : 
     573             :     ! Workspaces
     574             :     real( kind = core_rknd ), dimension(2*nsub+nsup+1,ndim,ngrdcol) :: & 
     575    35740224 :       lulhs ! LU Decomposition of the LHS
     576             : 
     577             :     integer, dimension(ndim) ::  & 
     578    35740224 :       ipivot
     579             : 
     580             :     integer ::  & 
     581             :       info,   & ! If this doesn't come back as 0, something went wrong
     582             :       imain  ! Main diagonal of the matrix
     583             : 
     584             :     integer :: i, j, d
     585             : 
     586             :     !-----------------------------------------------------------------------
     587             :     !       Reorder LU Matrix to use LAPACK band matrix format
     588             :     ! 
     589             :     !       Shift example for lulhs matrix given a 5x5 lhs matrix
     590             :     !           
     591             :     !                       
     592             :     !  lulhs =  
     593             :     !                            Columns
     594             :     !         1  2       3          4          5         6           7
     595             :     ! Rows       
     596             :     ! 1     [ 0  0       0          0      lhs(3,1)   lhs(4,2)   lhs(5,3) ]
     597             :     ! 2     [ 0  0       0      lhs(2,1)   lhs(3,2)   lhs(4,3)   lhs(5,4) ]
     598             :     ! 3     [ 0  0   lhs(1,1)   lhs(2,2)   lhs(3,3)   lhs(4,4)   lhs(5,5) ]
     599             :     ! 4     [ 0  0   lhs(1,2)   lhs(2,3)   lhs(3,4)   lhs(4,5)       0    ]
     600             :     ! 5     [ 0  0   lhs(1,3)   lhs(2,4)   lhs(3,5)       0          0    ]
     601             :     !                       
     602             :     !         all       lhs        lhs        lhs        lhs        lhs  
     603             :     !        set to   shifted     shifted      no      shifted    shifted
     604             :     !          0       down 2      down 1    shift      up 1        up 2
     605             :     ! 
     606             :     !   The first nsup columns of lulhs are always set to 0; 
     607             :     !   the rest of the columns are set to shifted 
     608             :     !   columns of lhs. This can be thought of as taking lhs, never touching the middle column, but
     609             :     !   shifting the columns that are n columns to the left of the middle down by n rows, and then
     610             :     !   shifting the columns that are n columns to the right of the middle up by n rows, finally 
     611             :     !   adding nsup columns of zeros onto the left of the array. This results in lulhs.
     612             :     !-----------------------------------------------------------------------
     613             : 
     614             :     ! Reorder lulhs, omitting the additional 2*nsub bands
     615             :     ! that are used for the LU decomposition of the matrix.
     616             : 
     617    17870112 :     imain = nsub + nsup + 1
     618             : 
     619             :     
     620             :     ! The first nsup rows of lulhs will contain 0s that are end-shifted lhs values. This needs
     621             :     ! to be handled differently so the algorithm to access lhs will not try to use out of bound
     622             :     ! values.
     623             :     !             ...   nsup     nsup+1    ...       imain   ... 
     624             :     !                        \ /                 \ /
     625             :     !                always   |  begins with nsup | all lhs values
     626             :     !                   0       0s, and decreases  
     627             :     !                           by one 0 each row 
     628             :     ! 
     629             :     !              ...  nsup     nsup+1    ...       imain   ... 
     630             :     ! lulhs(:,1) =  0     0       0         0         lhs    lhs
     631             :     ! lulhs(:,2) =  0     0       0        lhs        lhs    lhs
     632             :     ! 
     633             :     ! Since the first nsup rows are the first rows in lulhs, we're going to access them first to
     634             :     ! avoid out of order memory accesses.
     635   298389312 :     do i = 1, ngrdcol
     636   859427712 :       do d = 1, nsup
     637             : 
     638             :         ! Add 0s to first nsup columns, and decreasing number of end-shift affected columns
     639  2524672800 :         do j = 1, imain-d
     640  2524672800 :           lulhs(j,d,i) = 0.0_core_rknd
     641             :         end do
     642             : 
     643             :         ! Copy lhs values into appropriate lulhs spots
     644  2805192000 :         do j = imain-d+1, imain+nsub
     645  2524672800 :           lulhs(j,d,i) = lhs(j-nsub,i,d+j-imain)
     646             :         end do
     647             : 
     648             :       end do
     649             :     end do
     650             : 
     651             :     ! After the first nsup rows are dealt with, the offset lhs values can be copied into lulhs
     652             :     ! until the last nsup rows are reached. This is because the last nsup rows also contain
     653             :     ! end-shifted values, set to 0 in the next loop.
     654             :     ! 
     655             :     !                      ...  nsup     nsup+1    ...  
     656             :     !                                \ /   
     657             :     !                       always    |    all lhs values                
     658             :     ! 
     659             :     !                      ...  nsup     nsup+1    ...    
     660             :     ! lulhs(:,nsup+1)    =  0     0       lhs      lhs    
     661             :     ! lulhs(:,ndim-nsub) =  0     0       lhs      lhs    
     662             :     ! 
     663             :     ! For all values not affected by end-shifting
     664   298389312 :     do i = 1, ngrdcol
     665 46864576512 :       do d = nsup+1, ndim-nsub
     666             : 
     667             :         ! Set first nsup columns to 0
     668 >13969*10^7 :         do j = 1, nsub
     669 >13969*10^7 :           lulhs(j,d,i) = 0.0_core_rknd
     670             :         end do
     671             : 
     672             :         ! Copy lhs values into appropriate lulhs spots
     673 >27967*10^7 :         do j = imain-nsub, imain+nsub
     674 >27939*10^7 :           lulhs(j,d,i) = lhs(j-nsub,i, d+j-imain)
     675             :         end do
     676             : 
     677             :       end do
     678             :     end do
     679             : 
     680             : 
     681             :     ! The last nsup rows of lulhs will contain 0s that are end-shifted lhs values. This needs
     682             :     ! to be handled differently so the algorithm to access lhs will not try to use out of bound
     683             :     ! values.
     684             :     !             
     685             :     ! 
     686             :     !                        ...  nsup     nsup+1    ...      imain+1    ...    
     687             :     ! lulhs(:,ndim-nsub+1) =  0     0       lhs      lhs        lhs       0  
     688             :     ! lulhs(:,ndim)        =  0     0       lhs      lhs         0        0
     689             :     !
     690             :     !                                   |                   |   starts with one 0, then
     691             :     !                          always   |   all lhs values  |  then increases to nsup 0s
     692             :     !                                   |                   |       towards ndim
     693             :     !                                  / \                 / \
     694             :     !                        ...  nsup     nsup+1    ...          ndim-nsub+1 ... ndim
     695             :     ! 
     696             :     ! Finish the lulhs setup by accessing the last values last, keeping memory access ordered
     697   298389312 :     do i = 1, ngrdcol
     698   859427712 :       do d = ndim-nsub+1, ndim
     699             :     
     700             :         ! Set first nsup columns to 0
     701  1683115200 :         do j = 1, nsub
     702  1683115200 :           lulhs(j,d,i) = 0.0_core_rknd
     703             :         end do
     704             : 
     705             :         ! Copy lhs values into appropriate lulhs spots
     706  2524672800 :         do j = imain-nsup, imain-(d-ndim)
     707  2524672800 :           lulhs(j,d,i) = lhs(j-nsub,i, d+j-imain)
     708             :         end do
     709             : 
     710             :         ! Set increasing number of end-shift affected columns to 0
     711  1683115200 :         do j = imain-(d-ndim)+1, imain+nsub
     712  1402596000 :           lulhs(j,d,i) = 0.0_core_rknd
     713             :         end do
     714             :         
     715             :       end do
     716             :     end do
     717             : 
     718             : !-----------------------------------------------------------------------
     719             : !       *** LAPACK routine ***
     720             : !       SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
     721             : !-----------------------------------------------------------------------
     722             : 
     723             :     ! Lapack general band solver, sgbsv for single 
     724             :     ! or dgbsv for double precision
     725   298389312 :     do i = 1, ngrdcol
     726             :       call lapack_gbsv( ndim, nsub, nsup, nrhs, lulhs(:,:,i), nsub*2+nsup+1,  & 
     727 >24014*10^7 :                         ipivot, rhs(i,:,:), ndim, info )
     728             :     end do
     729             : 
     730             : 
     731           0 :     select case( info )
     732             : 
     733             :     case( :-1 )
     734           0 :           write(fstderr,*) "in band_solve for ", trim( solve_type ), &
     735           0 :             ": illegal value for argument", -info
     736           0 :           err_code = clubb_fatal_error
     737             :     case( 0 )
     738             :           ! Success!
     739    17870112 :           if ( clubb_at_least_debug_level( 1 ) ) then
     740           0 :             do i = 1, ngrdcol
     741           0 :               if ( lapack_isnan( ndim, nrhs, rhs(i,:,:) ) ) then
     742           0 :                 err_code = clubb_fatal_error 
     743             :               end if
     744             :             end do
     745             :           end if
     746             : 
     747 >12687*10^7 :           soln = rhs
     748             : 
     749             :     case( 1: )
     750           0 :         write(fstderr,*) "in band_solve for ", trim( solve_type ), &
     751           0 :                        ": singular matrix, solution not computed"
     752    17870112 :         err_code = clubb_fatal_error
     753             :     end select
     754             : 
     755    17870112 :     return
     756             :   end subroutine lapack_band_solve
     757             : 
     758             : end module lapack_wrap

Generated by: LCOV version 1.14