52 use,
intrinsic :: iso_c_binding
58 real(kind=
rp),
allocatable :: d(:)
59 real(kind=
rp),
allocatable :: w(:)
60 real(kind=
rp),
allocatable :: r(:)
61 real(kind=
rp) :: omega
64 integer :: max_iter = 10
65 logical :: recompute_diag = .true.
75 real(kind=
rp),
allocatable :: d(:)
76 type(c_ptr) :: d_d = c_null_ptr
77 real(kind=
rp),
allocatable :: w(:)
78 type(c_ptr) :: w_d = c_null_ptr
79 real(kind=
rp),
allocatable :: r(:)
80 type(c_ptr) :: r_d = c_null_ptr
81 real(kind=
rp) :: tha, dlt
84 integer :: power_its = 250
85 integer :: max_iter = 10
86 logical :: recompute_eigs = .true.
104 integer,
intent(in) :: n
105 integer,
intent(in) :: lvl
106 integer,
intent(in) :: max_iter
118 this%max_iter = max_iter
119 this%recompute_eigs = .true.
133 if (
allocated(this%d))
then
136 if (
allocated(this%w))
then
139 if (
allocated(this%r))
then
156 integer,
intent(in) :: n
157 real(kind=
rp) :: lam, b, a, rn
158 real(kind=
rp),
parameter :: boost = 1.1_rp
159 real(kind=
rp),
parameter :: lam_factor = 30.0_rp
160 real(kind=
rp) :: wtw, dtw, dtd
162 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
163 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
170 if (this%lvl .eq. 0)
then
171 call gs_h%op(d, n, gs_op_add)
172 call blst%apply(d, n)
175 do i = 1, this%power_its
176 call amg%matvec(w, d, this%lvl)
178 if (this%lvl .eq. 0)
then
179 wtw =
glsc3(w, coef%mult, w, n)
184 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
187 call amg%matvec(w, d, this%lvl)
189 if (this%lvl .eq. 0)
then
190 dtw =
glsc3(d, coef%mult, w, n)
191 dtd =
glsc3(d, coef%mult, d, n)
199 this%tha = (b+a)/2.0_rp
200 this%dlt = (b-a)/2.0_rp
202 this%recompute_eigs = .false.
215 integer,
intent(in) :: n
216 real(kind=rp),
dimension(n),
intent(inout) :: x
217 real(kind=rp),
dimension(n),
intent(inout) :: f
218 class(tamg_hierarchy_t),
intent(inout) :: amg
219 type(ksp_monitor_t) :: ksp_results
220 logical,
optional,
intent(in) :: zero_init
221 integer :: iter, max_iter, i
222 real(kind=rp) :: rtr, rnorm
223 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
224 logical :: zero_initial_guess
226 if (this%recompute_eigs)
then
227 call this%comp_eig(amg, n)
229 if (
present(zero_init))
then
230 zero_initial_guess = zero_init
232 zero_initial_guess = .false.
234 max_iter = this%max_iter
236 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
238 if (.not. zero_initial_guess)
then
239 call amg%matvec(w, x, this%lvl)
249 do concurrent(i = 1:n)
250 d(i) = 1.0_rp/thet * r(i)
255 do iter = 2, max_iter
256 call amg%matvec(w, d, this%lvl)
258 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
260 tmp2 = 2.0_rp * rhokp1 / delt
263 do concurrent(i = 1:n)
265 d(i) = tmp1 * d(i) + tmp2 * r(i)
278 type(tamg_hierarchy_t),
intent(inout) :: amg
279 integer,
intent(in) :: n
280 real(kind=rp) :: lam, b, a, rn
281 real(kind=rp),
parameter :: boost = 1.1_rp
282 real(kind=rp),
parameter :: lam_factor = 30.0_rp
283 real(kind=rp) :: wtw, dtw, dtd
285 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
286 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
291 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
292 if (this%lvl .eq. 0)
then
293 call gs_h%op(d, n, gs_op_add)
294 call blst%apply(d, n)
296 do i = 1, this%power_its
297 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
299 if (this%lvl .eq. 0)
then
300 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
302 wtw = device_glsc2(this%w_d, this%w_d, n)
305 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
308 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
310 if (this%lvl .eq. 0)
then
311 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
312 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
314 dtw = device_glsc2(this%d_d, this%w_d, n)
315 dtd = device_glsc2(this%d_d, this%d_d, n)
320 this%tha = (b+a)/2.0_rp
321 this%dlt = (b-a)/2.0_rp
323 this%recompute_eigs = .false.
336 integer,
intent(in) :: n
337 real(kind=rp),
dimension(n),
intent(inout) :: x
338 real(kind=rp),
dimension(n),
intent(inout) :: f
341 class(tamg_hierarchy_t),
intent(inout) :: amg
342 type(ksp_monitor_t) :: ksp_results
343 logical,
optional,
intent(in) :: zero_init
344 integer :: iter, max_iter
345 real(kind=rp) :: rtr, rnorm
346 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
347 logical :: zero_initial_guess
349 if (this%recompute_eigs)
then
350 call this%device_comp_eig(amg, n)
352 if (
present(zero_init))
then
353 zero_initial_guess = zero_init
355 zero_initial_guess = .false.
357 max_iter = this%max_iter
359 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
362 if (.not. zero_initial_guess)
then
363 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
373 call amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
374 tmp1, n, zero_initial_guess)
376 do iter = 2, max_iter
377 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
379 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
381 tmp2 = 2.0_rp * rhokp1 / delt
384 call amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
396 integer,
intent(in) :: n
397 integer,
intent(in) :: lvl
398 integer,
intent(in) :: max_iter
405 this%max_iter = max_iter
413 if (
allocated(this%d))
then
416 if (
allocated(this%w))
then
419 if (
allocated(this%r))
then
429 type(tamg_hierarchy_t),
intent(inout) :: amg
430 integer,
intent(in) :: n
434 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
435 this%d(i) = 1.0_rp / val
437 this%recompute_diag = .false.
447 integer,
intent(in) :: n
448 real(kind=rp),
dimension(n),
intent(inout) :: x
449 real(kind=rp),
dimension(n),
intent(inout) :: f
450 class(tamg_hierarchy_t),
intent(inout) :: amg
451 type(ksp_monitor_t) :: ksp_results
452 integer,
optional,
intent(in) :: niter
453 integer :: iter, max_iter
454 real(kind=rp) :: rtr, rnorm
457 if (this%recompute_diag)
then
458 call this%comp_diag(amg, n)
461 if (
present(niter))
then
464 max_iter = this%max_iter
468 associate( w => this%w, r => this%r, d => this%d)
469 do iter = 1, max_iter
472 call amg%matvec(w, x, this%lvl)
479 call add2s2(x, r, this%omega, n)
485 integer,
intent(in) :: lvl
487 character(len=LOG_SIZE) :: log_buf
489 write(log_buf,
'(A8,I2,A28)')
'-- level', lvl,
'-- init smoother: Chebyshev'
490 call neko_log%message(log_buf)
491 write(log_buf,
'(A22,I6)')
'Iterations:', smoo%max_iter
492 call neko_log%message(log_buf)
496 integer,
intent(in) :: lvl
497 real(kind=rp),
intent(in) :: lam
498 character(len=LOG_SIZE) :: log_buf
500 write(log_buf,
'(A12,I2,A29,F12.3)')
'-- AMG level', lvl, &
501 '-- Chebyshev approx. max eig', lam
502 call neko_log%message(log_buf)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Deassociate a Fortran array from a device pointer.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
Implements the base abstract type for Krylov solvers plus helper types.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public sub2(a, b, n)
Vector substraction .
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_cheby_free(this)
free cheby data
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_free(this)
free jacobi data
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
A list of allocatable `bc_t`. Follows the standard interface of lists.
Type for storing initial and final residuals in a Krylov solver.
Type for a TreeAMG hierarchy.
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.