51 use,
intrinsic :: iso_c_binding
57 real(kind=
rp),
allocatable :: d(:)
58 real(kind=
rp),
allocatable :: w(:)
59 real(kind=
rp),
allocatable :: r(:)
60 real(kind=
rp) :: omega
63 integer :: max_iter = 10
64 logical :: recompute_diag = .true.
74 real(kind=
rp),
allocatable :: d(:)
75 type(c_ptr) :: d_d = c_null_ptr
76 real(kind=
rp),
allocatable :: w(:)
77 type(c_ptr) :: w_d = c_null_ptr
78 real(kind=
rp),
allocatable :: r(:)
79 type(c_ptr) :: r_d = c_null_ptr
80 real(kind=
rp) :: tha, dlt
83 integer :: power_its = 250
84 integer :: max_iter = 10
85 logical :: recompute_eigs = .true.
103 integer,
intent(in) :: n
104 integer,
intent(in) :: lvl
105 integer,
intent(in) :: max_iter
117 this%max_iter = max_iter
118 this%recompute_eigs = .true.
127 if (
allocated(this%d))
then
133 if (
allocated(this%w))
then
139 if (
allocated(this%r))
then
154 integer,
intent(in) :: n
155 real(kind=
rp) :: lam, b, a, rn
156 real(kind=
rp),
parameter :: boost = 1.1_rp
157 real(kind=
rp),
parameter :: lam_factor = 30.0_rp
158 real(kind=
rp) :: wtw, dtw, dtd
160 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
161 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
168 if (this%lvl .eq. 0)
then
169 call gs_h%op(d, n, gs_op_add)
170 call blst%apply(d, n)
173 do i = 1, this%power_its
174 call amg%matvec(w, d, this%lvl)
176 if (this%lvl .eq. 0)
then
177 wtw =
glsc3(w, coef%mult, w, n)
182 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
185 call amg%matvec(w, d, this%lvl)
187 if (this%lvl .eq. 0)
then
188 dtw =
glsc3(d, coef%mult, w, n)
189 dtd =
glsc3(d, coef%mult, d, n)
197 this%tha = (b+a)/2.0_rp
198 this%dlt = (b-a)/2.0_rp
200 this%recompute_eigs = .false.
213 integer,
intent(in) :: n
214 real(kind=rp),
dimension(n),
intent(inout) :: x
215 real(kind=rp),
dimension(n),
intent(inout) :: f
216 class(tamg_hierarchy_t),
intent(inout) :: amg
217 type(ksp_monitor_t) :: ksp_results
218 logical,
optional,
intent(in) :: zero_init
219 integer :: iter, max_iter, i
220 real(kind=rp) :: rtr, rnorm
221 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
222 logical :: zero_initial_guess
224 if (this%recompute_eigs)
then
225 call this%comp_eig(amg, n)
227 if (
present(zero_init))
then
228 zero_initial_guess = zero_init
230 zero_initial_guess = .false.
232 max_iter = this%max_iter
234 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
236 if (.not. zero_initial_guess)
then
237 call amg%matvec(w, x, this%lvl)
252 d(i) = 1.0_rp/thet * r(i)
258 do iter = 2, max_iter
259 call amg%matvec(w, d, this%lvl)
261 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
263 tmp2 = 2.0_rp * rhokp1 / delt
273 d(i) = tmp1 * d(i) + tmp2 * r(i)
287 type(tamg_hierarchy_t),
intent(inout) :: amg
288 integer,
intent(in) :: n
289 real(kind=rp) :: lam, b, a, rn
290 real(kind=rp),
parameter :: boost = 1.1_rp
291 real(kind=rp),
parameter :: lam_factor = 30.0_rp
292 real(kind=rp) :: wtw, dtw, dtd
294 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
295 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
300 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
301 if (this%lvl .eq. 0)
then
302 call gs_h%op(d, n, gs_op_add)
303 call blst%apply(d, n)
305 do i = 1, this%power_its
306 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
308 if (this%lvl .eq. 0)
then
309 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
311 wtw = device_glsc2(this%w_d, this%w_d, n)
314 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
317 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
319 if (this%lvl .eq. 0)
then
320 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
321 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
323 dtw = device_glsc2(this%d_d, this%w_d, n)
324 dtd = device_glsc2(this%d_d, this%d_d, n)
329 this%tha = (b+a)/2.0_rp
330 this%dlt = (b-a)/2.0_rp
332 this%recompute_eigs = .false.
345 integer,
intent(in) :: n
346 real(kind=rp),
dimension(n),
intent(inout) :: x
347 real(kind=rp),
dimension(n),
intent(inout) :: f
350 class(tamg_hierarchy_t),
intent(inout) :: amg
351 type(ksp_monitor_t) :: ksp_results
352 logical,
optional,
intent(in) :: zero_init
353 integer :: iter, max_iter
354 real(kind=rp) :: rtr, rnorm
355 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
356 logical :: zero_initial_guess
358 if (this%recompute_eigs)
then
359 call this%device_comp_eig(amg, n)
361 if (
present(zero_init))
then
362 zero_initial_guess = zero_init
364 zero_initial_guess = .false.
366 max_iter = this%max_iter
368 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
371 if (.not. zero_initial_guess)
then
372 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
382 call amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
383 tmp1, n, zero_initial_guess)
385 do iter = 2, max_iter
386 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
388 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
390 tmp2 = 2.0_rp * rhokp1 / delt
393 call amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
405 integer,
intent(in) :: n
406 integer,
intent(in) :: lvl
407 integer,
intent(in) :: max_iter
414 this%max_iter = max_iter
422 if (
allocated(this%d))
then
425 if (
allocated(this%w))
then
428 if (
allocated(this%r))
then
438 type(tamg_hierarchy_t),
intent(inout) :: amg
439 integer,
intent(in) :: n
443 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
444 this%d(i) = 1.0_rp / val
446 this%recompute_diag = .false.
456 integer,
intent(in) :: n
457 real(kind=rp),
dimension(n),
intent(inout) :: x
458 real(kind=rp),
dimension(n),
intent(inout) :: f
459 class(tamg_hierarchy_t),
intent(inout) :: amg
460 type(ksp_monitor_t) :: ksp_results
461 integer,
optional,
intent(in) :: niter
462 integer :: iter, max_iter
463 real(kind=rp) :: rtr, rnorm
466 if (this%recompute_diag)
then
467 call this%comp_diag(amg, n)
470 if (
present(niter))
then
473 max_iter = this%max_iter
477 associate( w => this%w, r => this%r, d => this%d)
478 do iter = 1, max_iter
481 call amg%matvec(w, x, this%lvl)
498 x(i) = x(i) + this%omega * r(i)
507 integer,
intent(in) :: lvl
509 character(len=LOG_SIZE) :: log_buf
511 write(log_buf,
'(A8,I2,A28)')
'-- level', lvl,
'-- init smoother: Chebyshev'
512 call neko_log%message(log_buf)
513 write(log_buf,
'(A22,I6)')
'Iterations:', smoo%max_iter
514 call neko_log%message(log_buf)
518 integer,
intent(in) :: lvl
519 real(kind=rp),
intent(in) :: lam
520 character(len=LOG_SIZE) :: log_buf
522 write(log_buf,
'(A12,I2,A29,F12.3)')
'-- AMG level', lvl, &
523 '-- Chebyshev approx. max eig', lam
524 call neko_log%message(log_buf)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Unmap a Fortran array from a device (deassociate and free)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
Implements the base abstract type for Krylov solvers plus helper types.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public sub2(a, b, n)
Vector substraction .
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_cheby_free(this)
free cheby data
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_free(this)
free jacobi data
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
A list of allocatable `bc_t`. Follows the standard interface of lists.
Type for storing initial and final residuals in a Krylov solver.
Type for a TreeAMG hierarchy.
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.