d7/dd0/cacg_8f90_source.html

! Copyright (c) 2021-2025, The Neko Authors

! All rights reserved.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

!   * Redistributions of source code must retain the above copyright

!     notice, this list of conditions and the following disclaimer.

!

!   * Redistributions in binary form must reproduce the above

!     copyright notice, this list of conditions and the following

!     disclaimer in the documentation and/or other materials provided

!     with the distribution.

!

!   * Neither the name of the authors nor the names of its

!     contributors may be used to endorse or promote products derived

!     from this software without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

! POSSIBILITY OF SUCH DAMAGE.

!

module cacg

  use num_types, only : rp

  use neko_config, only : neko_blk_size

  use krylov, only : ksp_t, ksp_monitor_t, ksp_max_iter

  use precon, only : pc_t

  use ax_product, only : ax_t

  use field, only : field_t

  use coefs, only : coef_t

  use gather_scatter, only : gs_t, gs_op_add

  use bc_list, only : bc_list_t

  use math, only : glsc3, rzero, copy, x_update, abscmp

  use utils, only : neko_warning

  use comm, only : pe_rank, neko_comm, mpi_real_precision

  use mpi_f08, only : mpi_allreduce, mpi_sum

  use mxm_wrapper

  implicit none

  private


  type, public, extends(ksp_t) :: cacg_t

     real(kind=rp), allocatable :: r(:)

     real(kind=rp), allocatable :: p(:)

     real(kind=rp), allocatable :: pr(:,:)

     integer :: s = 4

   contains

     procedure, pass(this) :: init => cacg_init

     procedure, pass(this) :: free => cacg_free

     procedure, pass(this) :: solve => cacg_solve

     procedure, pass(this) :: solve_coupled => cacg_solve_coupled

  end type cacg_t


contains


  subroutine cacg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)

    class(cacg_t), target, intent(inout) :: this

    class(pc_t), optional, intent(in), target :: M

    integer, intent(in) :: n

    integer, intent(in) :: max_iter

    real(kind=rp), optional, intent(in) :: rel_tol

    real(kind=rp), optional, intent(in) :: abs_tol

    logical, optional, intent(in) :: monitor

    call this%free()


    if (pe_rank .eq. 0) then

       call neko_warning("Communication Avoiding CG chosen,&

       & be aware of potential instabilities")

    end if


    allocate(this%r(n))

    allocate(this%p(n))

    allocate(this%PR(n,4*this%s+1))

    if (present(m)) then

       this%M => m

    end if


    if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then

       call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)

    else if (present(rel_tol) .and. present(abs_tol)) then

       call this%ksp_init(max_iter, rel_tol, abs_tol)

    else if (present(monitor) .and. present(abs_tol)) then

       call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)

    else if (present(rel_tol) .and. present(monitor)) then

       call this%ksp_init(max_iter, rel_tol, monitor = monitor)

    else if (present(rel_tol)) then

       call this%ksp_init(max_iter, rel_tol = rel_tol)

    else if (present(abs_tol)) then

       call this%ksp_init(max_iter, abs_tol = abs_tol)

    else if (present(monitor)) then

       call this%ksp_init(max_iter, monitor = monitor)

    else

       call this%ksp_init(max_iter)

    end if


  end subroutine cacg_init


  subroutine cacg_free(this)

    class(cacg_t), intent(inout) :: this


    call this%ksp_free()


    if (allocated(this%PR)) then

       deallocate(this%PR)

    end if


    if (allocated(this%r)) then

       deallocate(this%r)

    end if


    if (allocated(this%p)) then

       deallocate(this%p)

    end if


    nullify(this%M)


  end subroutine cacg_free


  function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)

    class(cacg_t), intent(inout) :: this

    class(ax_t), intent(in) :: ax

    type(field_t), intent(inout) :: x

    integer, intent(in) :: n

    real(kind=rp), dimension(n), intent(in) :: f

    type(coef_t), intent(inout) :: coef

    type(bc_list_t), intent(inout) :: blst

    type(gs_t), intent(inout) :: gs_h

    type(ksp_monitor_t) :: ksp_results

    integer, optional, intent(in) :: niter

    integer :: i, j, k, l, iter, max_iter, s, ierr, it

    real(kind=rp) :: rnorm, rtr, rtz1, tmp

    real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac

    real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2

    real(kind=rp) :: p_c(4*this%s+1,this%s+1)

    real(kind=rp) :: r_c(4*this%s+1,this%s+1)

    real(kind=rp) :: z_c(4*this%s+1,this%s+1)

    real(kind=rp) :: x_c(4*this%s+1,this%s+1)


    associate(pr => this%PR, r => this%r, p => this%p)

      s = this%s

      if (present(niter)) then

         max_iter = niter

      else

         max_iter = this%max_iter

      end if

      norm_fac = 1.0_rp / sqrt(coef%volume)


      rtz1 = 1.0_rp

      call rzero(x%x, n)

      call copy(r, f, n)

      call this%M%solve(p, r, n)


      rtr = glsc3(r, coef%mult, r, n)

      rnorm = sqrt(rtr)*norm_fac

      ksp_results%res_start = rnorm

      ksp_results%res_final = rnorm

      ksp_results%iter = 0

      iter = 0

      if(abscmp(rnorm, 0.0_rp)) then

         ksp_results%converged = .true.

      end if

      call this%monitor_start('CACG')

      do while (iter < max_iter)


         call copy(pr,p, n)

         call copy(pr(1,2*s+2), r, n)


         !Here we have hardcoded a monomial basis atm.

         do i = 2, 2*s + 1

            if (mod(i,2) .eq. 0) then

               call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)

               call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)

               call blst%apply_scalar(pr(1,i), n)

            else

               call this%M%solve(pr(1,i), pr(1,i-1), n)

            end if

         end do


         do i = 2*s+2, 4*s

            if (mod(i,2) == 0) then

               call this%M%solve(pr(1,i+1), pr(1,i), n)

            else

               call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)

               call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)

               call blst%apply_scalar(pr(1,1+i), n)

            end if

         end do


         call construct_basis_matrix(tt, s)

         call rzero(p_c, (4*s+1) * (s+1))

         p_c(1,1) = 1.0_rp

         call rzero(r_c, (4*s+1) * (s+1))

         r_c(2*s+2,1) = 1.0_rp

         call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)

         call rzero(x_c, (4*s+1) * (s+1))

         call rzero(temp, (4*s+1)**2)


         do i = 0, n, neko_blk_size

            it = 0

            if (i + neko_blk_size .le. n) then

               do j = 1, 4*s+1

                  do l = 1, j

                     it = it + 1

                     do k = 1, neko_blk_size

                        temp(it,1) = temp(it,1) &

                             + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)

                     end do

                  end do

               end do

            else

               do j = 1, 4*s+1

                  do l = 1, j

                     it = it + 1

                     do k = 1, n-i

                        temp(it,1) = temp(it,1) &

                             + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)

                     end do

                  end do

               end do

            end if

         end do


         call mpi_allreduce(temp, temp2, it, &

              mpi_real_precision, mpi_sum, neko_comm, ierr)

         it = 0

         do j = 1, 4*s+1

            do k = 1, j

               it = it + 1

               g(j,k) = temp2(it,1)

               g(k,j) = temp2(it,1)

            end do

         end do


         call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)


         do j = 1, s

            iter = iter + 1


            call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)

            call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)

            alpha1 = 0.0_rp

            alpha2 = 0.0_rp

            do i = 1,4*s+1

               alpha1 = alpha1 + temp(i,1) * z_c(i,j)

               alpha2 = alpha2 + temp2(i,1) * p_c(i,j)

            end do

            alpha(j) = alpha1/alpha2


            do i = 1, 4*s+1

               x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)

               tmp = 0.0_rp

               do k = 1, 4*s+1

                  tmp = tmp + tt(i,k) * p_c(k,j)

               end do

               r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp

               tmp = 0.0_rp

               do k = 1, 4*s+1

                  tmp = tmp + tt(i,k)*r_c(k,j+1)

               end do

               z_c(i,j+1) = tmp

            end do


            call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)

            alpha2 = 0.0_rp

            do i = 1,4*s+1

               alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)

            end do

            beta(j) = alpha2 / alpha1

            do i = 1,4*s+1

               p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)

            end do

         end do


         call rzero(p, n)

         call rzero(r, n)

         rtr = 0.0_rp

         do i = 0, n, neko_blk_size

            if (i + neko_blk_size .le. n) then

               do j = 1, 4*s + 1

                  do k = 1, neko_blk_size

                     x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)

                     p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)

                     tmp = pr(i+k,j) * r_c(j,s+1)

                     r(i+k) = r(i+k) + tmp

                  end do

               end do

               do k = 1, neko_blk_size

                  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)

               end do

            else

               do j = 1,4*s+1

                  do k = 1, n-i

                     x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)

                     p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)

                     tmp = pr(i+k,j) * r_c(j,s+1)

                     r(i+k) = r(i+k) + tmp

                  end do

               end do

               do k = 1, n-i

                  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)

               end do

            end if

         end do


         call mpi_allreduce(rtr, tmp, 1, &

              mpi_real_precision, mpi_sum, neko_comm, ierr)

         rnorm = norm_fac*sqrt(tmp)

         call this%monitor_iter(iter, rnorm)

         if( rnorm <= this%abs_tol) exit

      end do

      call this%monitor_stop()

      ksp_results%res_final = rnorm

      ksp_results%iter = iter

      ksp_results%converged = this%is_converged(iter, rnorm)


    end associate


  end function cacg_solve


  subroutine construct_basis_matrix(Tt, s)

    integer, intent(in) :: s

    real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)

    integer :: mlen, i

    mlen = (4*s+1)*(4*s+1)

    call rzero(tt,mlen)

    do i = 1, 2*s

       tt(i+1,i) = 1.0_rp

    end do

    do i = 1, (2*s-1)

       tt(2*s+2+i,2*s+1+i) = 1.0_rp

    end do


  end subroutine construct_basis_matrix


  function cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &

       n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)

    class(cacg_t), intent(inout) :: this

    class(ax_t), intent(in) :: ax

    type(field_t), intent(inout) :: x

    type(field_t), intent(inout) :: y

    type(field_t), intent(inout) :: z

    integer, intent(in) :: n

    real(kind=rp), dimension(n), intent(in) :: fx

    real(kind=rp), dimension(n), intent(in) :: fy

    real(kind=rp), dimension(n), intent(in) :: fz

    type(coef_t), intent(inout) :: coef

    type(bc_list_t), intent(inout) :: blstx

    type(bc_list_t), intent(inout) :: blsty

    type(bc_list_t), intent(inout) :: blstz

    type(gs_t), intent(inout) :: gs_h

    type(ksp_monitor_t), dimension(3) :: ksp_results

    integer, optional, intent(in) :: niter


    ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)

    ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)

    ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)


  end function cacg_solve_coupled


end module cacg


solve
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Definition spalding_kernel.h:124

math::abscmp
Definition math.f90:77

ax_product
Defines a Matrix-vector product.
Definition ax.f90:34

bc_list
Defines a list of bc_t.
Definition bc_list.f90:34

cacg
Defines a communication avoiding Conjugate Gradient method.
Definition cacg.f90:34

cacg::cacg_solve
type(ksp_monitor_t) function cacg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition cacg.f90:135

cacg::construct_basis_matrix
subroutine construct_basis_matrix(tt, s)
Monomial matrix constuction, not sparse.
Definition cacg.f90:337

cacg::cacg_solve_coupled
type(ksp_monitor_t) function, dimension(3) cacg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
S-step CA PCG coupled solve.
Definition cacg.f90:353

cacg::cacg_free
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition cacg.f90:112

cacg::cacg_init
subroutine cacg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a s-step CA PCG solver.
Definition cacg.f90:69

coefs
Coefficients.
Definition coef.f90:34

comm
Definition comm.F90:1

comm::mpi_real_precision
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51

comm::pe_rank
integer, public pe_rank
MPI rank.
Definition comm.F90:56

comm::neko_comm
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43

field
Defines a field.
Definition field.f90:34

gather_scatter
Gather-scatter.
Definition gather_scatter.f90:34

krylov
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34

krylov::ksp_max_iter
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51

math
Definition math.f90:60

math::glsc3
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067

math::x_update
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1033

math::copy
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249

math::rzero
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205

mxm_wrapper
Wrapper for all matrix-matrix product implementations.
Definition mxm_wrapper.F90:2

mxm_wrapper::mxm
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product  for contiguously packed matrices A,B, and C.
Definition mxm_wrapper.F90:29

neko_config
Build configurations.
Definition neko_config.f90:34

neko_config::neko_blk_size
integer, parameter neko_blk_size
Definition neko_config.f90:48

num_types
Definition num_types.f90:1

num_types::rp
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12

precon
Krylov preconditioner.
Definition precon.f90:34

utils
Utilities.
Definition utils.f90:35

utils::neko_warning
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346

ax_product::ax_t
Base type for a matrix-vector product providing .
Definition ax.f90:43

bc_list::bc_list_t
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48

cacg::cacg_t
S-step communication avoiding preconditioned conjugate gradient method.
Definition cacg.f90:53

coefs::coef_t
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55

field::field_t
Definition field.f90:47

gather_scatter::gs_t
Definition gather_scatter.f90:68

krylov::ksp_monitor_t
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56

krylov::ksp_t
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73

precon::pc_t
Defines a canonical Krylov preconditioner.
Definition precon.f90:40