51  use, 
intrinsic :: iso_c_binding
 
   57     real(kind=
rp), 
allocatable :: d(:)
 
   58     real(kind=
rp), 
allocatable :: w(:)
 
   59     real(kind=
rp), 
allocatable :: r(:)
 
   60     real(kind=
rp) :: omega
 
   63     integer :: max_iter = 10
 
   64     logical :: recompute_diag = .true.
 
 
   73     real(kind=
rp), 
allocatable :: d(:)
 
   74     type(c_ptr) :: d_d = c_null_ptr
 
   75     real(kind=
rp), 
allocatable :: w(:)
 
   76     type(c_ptr) :: w_d = c_null_ptr
 
   77     real(kind=
rp), 
allocatable :: r(:)
 
   78     type(c_ptr) :: r_d = c_null_ptr
 
   79     real(kind=
rp) :: tha, dlt
 
   82     integer :: power_its = 250
 
   83     integer :: max_iter = 10
 
   84     logical :: recompute_eigs = .true.
 
 
  101    integer, 
intent(in) :: n
 
  102    integer, 
intent(in) :: lvl
 
  103    integer, 
intent(in) :: max_iter
 
  115    this%max_iter = max_iter
 
  116    this%recompute_eigs = .true.
 
 
  129    integer, 
intent(in) :: n
 
  130    real(kind=
rp) :: lam, b, a, rn
 
  131    real(kind=
rp), 
parameter :: boost = 1.1_rp
 
  132    real(kind=
rp), 
parameter :: lam_factor = 30.0_rp
 
  133    real(kind=
rp) :: wtw, dtw, dtd
 
  135    associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
 
  136         msh => amg%msh, xh => amg%Xh, blst => amg%blst)
 
  143      if (this%lvl .eq. 0) 
then 
  144         call gs_h%op(d, n, gs_op_add)
 
  145         call blst%apply(d, n)
 
  148      do i = 1, this%power_its
 
  149         call amg%matvec(w, d, this%lvl)
 
  151         if (this%lvl .eq. 0) 
then 
  152            wtw = 
glsc3(w, coef%mult, w, n)
 
  157         call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
 
  160      call amg%matvec(w, d, this%lvl)
 
  162      if (this%lvl .eq. 0) 
then 
  163         dtw = 
glsc3(d, coef%mult, w, n)
 
  164         dtd = 
glsc3(d, coef%mult, d, n)
 
  172      this%tha = (b+a)/2.0_rp
 
  173      this%dlt = (b-a)/2.0_rp
 
  175      this%recompute_eigs = .false.
 
 
  188    integer, 
intent(in) :: n
 
  189    real(kind=rp), 
dimension(n), 
intent(inout) :: x
 
  190    real(kind=rp), 
dimension(n), 
intent(inout) :: f
 
  191    class(tamg_hierarchy_t), 
intent(inout) :: amg
 
  192    type(ksp_monitor_t) :: ksp_results
 
  193    logical, 
optional, 
intent(in) :: zero_init
 
  194    integer :: iter, max_iter, i
 
  195    real(kind=rp) :: rtr, rnorm
 
  196    real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
 
  197    logical :: zero_initial_guess
 
  199    if (this%recompute_eigs) 
then 
  200       call this%comp_eig(amg, n)
 
  202    if (
present(zero_init)) 
then 
  203       zero_initial_guess = zero_init
 
  205       zero_initial_guess = .false.
 
  207    max_iter = this%max_iter
 
  209    associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
 
  211      if (.not. zero_initial_guess) 
then 
  212         call amg%matvec(w, x, this%lvl)
 
  222      do concurrent(i = 1:n)
 
  223         d(i) = 1.0_rp/thet * r(i)
 
  228      do iter = 2, max_iter
 
  229         call amg%matvec(w, d, this%lvl)
 
  231         rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
 
  233         tmp2 = 2.0_rp * rhokp1 / delt
 
  236         do concurrent(i = 1:n)
 
  238            d(i) = tmp1 * d(i) + tmp2 * r(i)
 
 
  251    type(tamg_hierarchy_t), 
intent(inout) :: amg
 
  252    integer, 
intent(in) :: n
 
  253    real(kind=rp) :: lam, b, a, rn
 
  254    real(kind=rp), 
parameter :: boost = 1.1_rp
 
  255    real(kind=rp), 
parameter :: lam_factor = 30.0_rp
 
  256    real(kind=rp) :: wtw, dtw, dtd
 
  258    associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
 
  259         msh => amg%msh, xh => amg%Xh, blst => amg%blst)
 
  264      call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
 
  265      if (this%lvl .eq. 0) 
then 
  266         call gs_h%op(d, n, gs_op_add)
 
  267         call blst%apply(d, n)
 
  269      do i = 1, this%power_its
 
  270         call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
 
  272         if (this%lvl .eq. 0) 
then 
  273            wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
 
  275            wtw = device_glsc2(this%w_d, this%w_d, n)
 
  278         call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
 
  281      call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
 
  283      if (this%lvl .eq. 0) 
then 
  284         dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
 
  285         dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
 
  287         dtw = device_glsc2(this%d_d, this%w_d, n)
 
  288         dtd = device_glsc2(this%d_d, this%d_d, n)
 
  293      this%tha = (b+a)/2.0_rp
 
  294      this%dlt = (b-a)/2.0_rp
 
  296      this%recompute_eigs = .false.
 
 
  309    integer, 
intent(in) :: n
 
  310    real(kind=rp), 
dimension(n), 
intent(inout) :: x
 
  311    real(kind=rp), 
dimension(n), 
intent(inout) :: f
 
  314    class(tamg_hierarchy_t), 
intent(inout) :: amg
 
  315    type(ksp_monitor_t) :: ksp_results
 
  316    logical, 
optional, 
intent(in) :: zero_init
 
  317    integer :: iter, max_iter
 
  318    real(kind=rp) :: rtr, rnorm
 
  319    real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
 
  320    logical :: zero_initial_guess
 
  322    if (this%recompute_eigs) 
then 
  323       call this%device_comp_eig(amg, n)
 
  325    if (
present(zero_init)) 
then 
  326       zero_initial_guess = zero_init
 
  328       zero_initial_guess = .false.
 
  330    max_iter = this%max_iter
 
  332    associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
 
  335      if (.not. zero_initial_guess) 
then 
  336         call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
 
  346      call amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
 
  347           tmp1, n, zero_initial_guess)
 
  349      do iter = 2, max_iter
 
  350         call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
 
  352         rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
 
  354         tmp2 = 2.0_rp * rhokp1 / delt
 
  357         call amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
 
 
  369    integer, 
intent(in) :: n
 
  370    integer, 
intent(in) :: lvl
 
  371    integer, 
intent(in) :: max_iter
 
  378    this%max_iter = max_iter
 
 
  388    type(tamg_hierarchy_t), 
intent(inout) :: amg
 
  389    integer, 
intent(in) :: n
 
  393       call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
 
  394       this%d(i) = 1.0_rp / val
 
  396    this%recompute_diag = .false.
 
 
  406    integer, 
intent(in) :: n
 
  407    real(kind=rp), 
dimension(n), 
intent(inout) :: x
 
  408    real(kind=rp), 
dimension(n), 
intent(inout) :: f
 
  409    class(tamg_hierarchy_t), 
intent(inout) :: amg
 
  410    type(ksp_monitor_t) :: ksp_results
 
  411    integer, 
optional, 
intent(in) :: niter
 
  412    integer :: iter, max_iter
 
  413    real(kind=rp) :: rtr, rnorm
 
  416    if (this%recompute_diag) 
then 
  417       call this%comp_diag(amg, n)
 
  420    if (
present(niter)) 
then 
  423       max_iter = this%max_iter
 
  427    associate( w => this%w, r => this%r, d => this%d)
 
  428      do iter = 1, max_iter
 
  431         call amg%matvec(w, x, this%lvl)
 
  438         call add2s2(x, r, this%omega, n)
 
 
  444    integer, 
intent(in) :: lvl
 
  446    character(len=LOG_SIZE) :: log_buf
 
  448    write(log_buf, 
'(A8,I2,A28)') 
'-- level', lvl, 
'-- init smoother: Chebyshev' 
  449    call neko_log%message(log_buf)
 
  450    write(log_buf, 
'(A22,I6)') 
'Iterations:', smoo%max_iter
 
  451    call neko_log%message(log_buf)
 
 
  455    integer, 
intent(in) :: lvl
 
  456    real(kind=rp), 
intent(in) :: lam
 
  457    character(len=LOG_SIZE) :: log_buf
 
  459    write(log_buf, 
'(A12,I2,A29,F12.3)') 
'-- AMG level', lvl, &
 
  460         '-- Chebyshev approx. max eig', lam
 
  461    call neko_log%message(log_buf)
 
 
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
 
Map a Fortran array to a device (allocate and associate)
 
Copy data between host and device (or device and device)
 
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
 
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
 
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
 
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
 
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
 
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
 
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
 
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
 
Implements device kernels for use with TreeAMG smoothers.
 
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
 
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
 
Device abstraction, common interface for various accelerators.
 
integer, parameter, public host_to_device
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
Implements the base abstract type for Krylov solvers plus helper types.
 
type(log_t), public neko_log
Global log stream.
 
integer, parameter, public log_size
 
subroutine, public cmult(a, c, n)
Multiplication by constant c .
 
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
 
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
 
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
 
subroutine, public add2(a, b, n)
Vector addition .
 
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
 
subroutine, public col2(a, b, n)
Vector multiplication .
 
subroutine, public copy(a, b, n)
Copy a vector .
 
subroutine, public sub2(a, b, n)
Vector substraction .
 
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication  (multiplication on second argument)
 
integer, parameter neko_bcknd_device
 
integer, parameter, public rp
Global precision used in computations.
 
Implements smoothers for use with TreeAMG matrix vector product.
 
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
 
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
 
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
 
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
 
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
 
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
 
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
 
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
 
subroutine amg_smoo_monitor(lvl, smoo)
 
subroutine amg_cheby_monitor(lvl, lam)
 
Implements utilities for the TreeAMG hierarchy structure.
 
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
 
Implements the base type for TreeAMG hierarchy structure.
 
A list of allocatable `bc_t`. Follows the standard interface of lists.
 
Type for storing initial and final residuals in a Krylov solver.
 
Type for a TreeAMG hierarchy.
 
Type for Chebyshev iteration using TreeAMG matvec.
 
Type for Chebyshev iteration using TreeAMG matvec.