LCOV - code coverage report
Current view: top level - dynamics/fv - par_xsum.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 18 72 25.0 %
Date: 2025-03-14 01:23:43 Functions: 1 2 50.0 %

          Line data    Source code
       1             : !-----------------------------------------------------------------------
       2             : !BOP
       3             : ! !ROUTINE: par_xsum --- Calculate x-sum bit-wise consistently
       4             : !
       5             : ! !INTERFACE:
       6             : !****6***0*********0*********0*********0*********0*********0**********72
       7      396048 :       subroutine par_xsum(grid, a, ltot, sum)
       8             : !****6***0*********0*********0*********0*********0*********0**********72
       9             : !
      10             : ! !USES:
      11             : #if defined ( SPMD )
      12             :       use parutilitiesmodule, only : parexchangevector
      13             : #endif
      14             :       use dynamics_vars, only : T_FVDYCORE_GRID
      15             :       use shr_kind_mod, only: r8 => shr_kind_r8
      16             :       use shr_reprosum_mod, only : shr_reprosum_calc, shr_reprosum_tolExceeded, &
      17             :                                 shr_reprosum_reldiffmax, &
      18             :                                 shr_reprosum_recompute
      19             :       use cam_logfile,   only : iulog
      20             :       use FVperf_module, only : FVstartclock, FVstopclock
      21             : 
      22             :       implicit none
      23             : 
      24             : ! !INPUT PARAMETERS:
      25             :       type (T_FVDYCORE_GRID), intent(in) :: grid
      26             :       integer, intent(in) :: ltot       ! number of quantities to be summed
      27             :       ! input vector to be summed
      28             :       real (r8), intent(in) :: a(grid%ifirstxy:grid%ilastxy,ltot)
      29             : 
      30             : ! !OUTPUT PARAMETERS:
      31             :       real (r8) sum(ltot)               ! sum of all vector entries
      32             : 
      33             : ! !DESCRIPTION:
      34             : !     This subroutine calculates the sum of "a" in a reproducible
      35             : !     (sequentialized) fashion which should give bit-wise identical
      36             : !     results irrespective of the number of MPI processes.
      37             : !
      38             : ! !CALLED FROM:
      39             : !     te_map
      40             : !
      41             : ! !REVISION HISTORY:
      42             : !
      43             : !     AAM 00.11.01 : Created
      44             : !     WS  03.10.22 : pmgrid removed (now spmd_dyn)
      45             : !     WS  04.10.04 : added grid as an argument; removed spmd_dyn
      46             : !     WS  05.05.25 : removed ifirst, ilast, im as arguments (in grid)
      47             : !     PW  08.06.25 : added fixed point reproducible sum
      48             : !
      49             : !EOP
      50             : !---------------------------------------------------------------------
      51             : !BOC
      52             :  
      53             : ! !Local
      54             :       real(r8), parameter ::  D0_0                    =  0.0_r8
      55             : 
      56      792096 :       real(r8) :: rel_diff(2,ltot)
      57      396048 :       real(r8),allocatable :: l_a(:)
      58      396048 :       real(r8),allocatable :: a_tmp(:)
      59             : 
      60             :       integer :: i,ipe,l,im,lim,nprxy_x,offset
      61      792096 :       integer :: sendcount(grid%nprxy_x)
      62      792096 :       integer :: recvcount(grid%nprxy_x)
      63             : 
      64             :       logical :: write_warning
      65             : 
      66      396048 :       im  = grid%im
      67      396048 :       lim = (grid%ilastxy-grid%ifirstxy) + 1
      68      396048 :       nprxy_x = grid%nprxy_x
      69      396048 :       offset  = grid%ifirstxy - 1
      70             : 
      71      396048 :       call FVstartclock(grid,'xsum_reprosum')
      72             :       call shr_reprosum_calc(a, sum, lim, lim, ltot, gbl_count=im, &
      73      396048 :                      commid=grid%commxy_x, rel_diff=rel_diff)
      74      396048 :       call FVstopclock(grid,'xsum_reprosum')
      75             : 
      76             :       ! check that "fast" reproducible sum is accurate enough. If not, calculate
      77             :       ! using old method
      78      396048 :       write_warning = .false.
      79      396048 :       if (grid%myidxy_x == 0) write_warning = .true.
      80      396048 :       if ( shr_reprosum_tolExceeded('par_xsum', ltot, write_warning, &
      81             :            iulog, rel_diff) ) then
      82           0 :          if ( shr_reprosum_recompute ) then
      83           0 :             call FVstartclock(grid,'xsum_sumfix')
      84           0 :             allocate( l_a(lim*nprxy_x) )
      85           0 :             allocate( a_tmp(im) )
      86           0 :             sendcount(:) = lim
      87             : 
      88           0 :             do l=1,ltot
      89           0 :                if (rel_diff(1,l) > shr_reprosum_reldiffmax) then
      90           0 :                   sum(l) = D0_0
      91             : #if defined ( SPMD )
      92           0 :                   do ipe=1,nprxy_x
      93           0 :                      do i=1,lim
      94           0 :                         l_a(i+(ipe-1)*lim) = a(i+offset,l)
      95             :                      enddo
      96             :                   enddo
      97             :                   call parexchangevector( grid%commxy_x, sendcount, l_a, &
      98           0 :                                           recvcount, a_tmp )
      99           0 :                   do i=1,im
     100           0 :                      sum(l) = sum(l) + a_tmp(i)
     101             :                   enddo
     102             : #else
     103             :                   do i=1,im
     104             :                      sum(l) = sum(l) + a(i,l)
     105             :                   enddo
     106             : #endif
     107             :                endif
     108             : 
     109             :             enddo
     110             : 
     111           0 :             deallocate( a_tmp )
     112           0 :             deallocate( l_a )
     113           0 :             call FVstopclock(grid,'xsum_sumfix')
     114             :          endif
     115             :       endif
     116             : 
     117      396048 :       return
     118             : !EOC
     119      396048 :       end subroutine par_xsum
     120             : !-----------------------------------------------------------------------
     121             : 
     122             : !-----------------------------------------------------------------------
     123             : !BOP
     124             : ! !ROUTINE: par_xsum_r4 --- Calculate x-sum bit-wise consistently (real4)
     125             : !
     126             : ! !INTERFACE:
     127             : !****6***0*********0*********0*********0*********0*********0**********72
     128           0 :       subroutine par_xsum_r4(grid, a, ltot, sum)
     129             : !****6***0*********0*********0*********0*********0*********0**********72
     130             : !
     131             : ! !USES:
     132             : #if defined ( SPMD )
     133             :       use parutilitiesmodule, only : parexchangevector
     134             : #endif
     135             :       use dynamics_vars, only : T_FVDYCORE_GRID
     136             :       use shr_kind_mod, only: r8 => shr_kind_r8, r4 => shr_kind_r4
     137             :       use shr_reprosum_mod, only : shr_reprosum_calc, shr_reprosum_tolExceeded, &
     138             :                                 shr_reprosum_reldiffmax, &
     139             :                                 shr_reprosum_recompute
     140             :       use cam_logfile,   only : iulog
     141             :       use FVperf_module, only : FVstartclock, FVstopclock
     142             : 
     143             :       implicit none
     144             : 
     145             : ! !INPUT PARAMETERS:
     146             :       type (T_FVDYCORE_GRID), intent(in) :: grid
     147             :       integer, intent(in) :: ltot       ! number of quantities to be summed
     148             :       real (r4) a(grid%ifirstxy:grid%ilastxy,ltot)    ! input vector to be summed
     149             : 
     150             : ! !OUTPUT PARAMETERS:
     151             :       real (r8) sum(ltot)               ! sum of all vector entries
     152             : 
     153             : ! !DESCRIPTION:
     154             : !     This subroutine calculates the sum of "a" in a reproducible
     155             : !     (sequentialized) fashion which should give bit-wise identical
     156             : !     results irrespective of the number of MPI processes.
     157             : !
     158             : ! !REVISION HISTORY:
     159             : !
     160             : !     WS  05.04.08 : Created from par_xsum
     161             : !     WS  05.05.25 : removed ifirst, ilast, im as arguments (in grid)
     162             : !     WS  06.06.28 : Fixed bug in sequential version
     163             : !     PW  08.06.25 : added fixed point reproducible sum
     164             : !
     165             : !EOP
     166             : !---------------------------------------------------------------------
     167             : !BOC
     168             :  
     169             : ! !Local
     170             :       real(r8), parameter ::  D0_0                    =  0.0_r8
     171             : 
     172           0 :       real(r8) :: a8(grid%ifirstxy:grid%ilastxy,ltot)
     173           0 :       real(r8) :: rel_diff(2,ltot)
     174           0 :       real(r4),allocatable :: l_a(:)
     175           0 :       real(r4),allocatable :: a_tmp(:)
     176             : 
     177             :       integer i,ipe,l,im,lim,nprxy_x,offset
     178           0 :       integer sendcount(grid%nprxy_x)
     179           0 :       integer recvcount(grid%nprxy_x)
     180             : 
     181             :       logical :: write_warning
     182             : 
     183           0 :       im  = grid%im
     184           0 :       lim = (grid%ilastxy-grid%ifirstxy) + 1
     185           0 :       nprxy_x = grid%nprxy_x
     186           0 :       offset  = grid%ifirstxy - 1
     187             : 
     188           0 :       call FVstartclock(grid,'xsum_r4_reprosum')
     189           0 :       a8(:,:) = a(:,:)
     190             :       call shr_reprosum_calc(a8, sum, lim, lim, ltot, gbl_count=im, &
     191           0 :                      commid=grid%commxy_x, rel_diff=rel_diff)
     192           0 :       call FVstopclock(grid,'xsum_r4_reprosum')
     193             : 
     194             :       ! check that "fast" reproducible sum is accurate enough. If not, calculate
     195             :       ! using old method
     196           0 :       write_warning = .false.
     197           0 :       if (grid%myidxy_x == 0) write_warning = .true.
     198           0 :       if ( shr_reprosum_tolExceeded('par_xsum_r4', ltot, write_warning, &
     199             :            iulog, rel_diff) ) then
     200           0 :          if ( shr_reprosum_recompute ) then
     201           0 :             call FVstartclock(grid,'xsum_r4_sumfix')
     202           0 :             allocate( l_a(lim*nprxy_x) )
     203           0 :             allocate( a_tmp(im) )
     204           0 :             sendcount(:) = lim
     205             : 
     206           0 :             do l=1,ltot
     207           0 :                if (rel_diff(1,l) > shr_reprosum_reldiffmax) then
     208           0 :                   sum(l) = D0_0
     209             : #if defined ( SPMD )
     210           0 :                   do ipe=1,nprxy_x
     211           0 :                      do i=1,lim
     212           0 :                         l_a(i+(ipe-1)*lim) = a(i+offset,l)
     213             :                      enddo
     214             :                   enddo
     215             :                   call parexchangevector( grid%commxy_x, sendcount, l_a, &
     216           0 :                                           recvcount, a_tmp )
     217           0 :                   do i=1,im
     218           0 :                      sum(l) = sum(l) + a_tmp(i)
     219             :                   enddo
     220             : #else
     221             :                   do i=1,im
     222             :                      sum(l) = sum(l) + a(i,l)
     223             :                   enddo
     224             : #endif
     225             :                endif
     226             : 
     227             :             enddo
     228             : 
     229           0 :             deallocate( a_tmp )
     230           0 :             deallocate( l_a )
     231           0 :             call FVstopclock(grid,'xsum_r4_sumfix')
     232             :          endif
     233             :       endif
     234             : 
     235           0 :       return
     236             : !EOC
     237           0 :       end subroutine par_xsum_r4
     238             : !-----------------------------------------------------------------------

Generated by: LCOV version 1.14