49 use,
intrinsic :: iso_c_binding
55 real(kind=
rp),
allocatable :: d(:)
56 real(kind=
rp),
allocatable :: w(:)
57 real(kind=
rp),
allocatable :: r(:)
58 real(kind=
rp) :: omega
61 integer :: max_iter = 10
62 logical :: recompute_diag = .true.
71 real(kind=
rp),
allocatable :: d(:)
72 type(c_ptr) :: d_d = c_null_ptr
73 real(kind=
rp),
allocatable :: w(:)
74 type(c_ptr) :: w_d = c_null_ptr
75 real(kind=
rp),
allocatable :: r(:)
76 type(c_ptr) :: r_d = c_null_ptr
77 real(kind=
rp) :: tha, dlt
80 integer :: power_its = 250
81 integer :: max_iter = 10
82 logical :: recompute_eigs = .true.
99 integer,
intent(in) :: n
100 integer,
intent(in) :: lvl
101 integer,
intent(in) :: max_iter
113 this%max_iter = max_iter
114 this%recompute_eigs = .true.
127 integer,
intent(in) :: n
128 real(kind=
rp) :: lam, b, a, rn
129 real(kind=
rp),
parameter :: boost = 1.1_rp
130 real(kind=
rp),
parameter :: lam_factor = 30.0_rp
131 real(kind=
rp) :: wtw, dtw, dtd
133 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
134 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
141 if (this%lvl .eq. 0)
then
142 call gs_h%op(d, n, gs_op_add)
143 call blst%apply(d, n)
146 do i = 1, this%power_its
147 call amg%matvec(w, d, this%lvl)
149 if (this%lvl .eq. 0)
then
150 wtw =
glsc3(w, coef%mult, w, n)
155 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
158 call amg%matvec(w, d, this%lvl)
160 if (this%lvl .eq. 0)
then
161 dtw =
glsc3(d, coef%mult, w, n)
162 dtd =
glsc3(d, coef%mult, d, n)
170 this%tha = (b+a)/2.0_rp
171 this%dlt = (b-a)/2.0_rp
173 this%recompute_eigs = .false.
186 integer,
intent(in) :: n
187 real(kind=rp),
dimension(n),
intent(inout) :: x
188 real(kind=rp),
dimension(n),
intent(inout) :: f
189 class(tamg_hierarchy_t),
intent(inout) :: amg
190 type(ksp_monitor_t) :: ksp_results
191 logical,
optional,
intent(in) :: zero_init
192 integer :: iter, max_iter
193 real(kind=rp) :: rtr, rnorm
194 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
195 logical :: zero_initial_guess
197 if (this%recompute_eigs)
then
198 call this%comp_eig(amg, n)
200 if (
present(zero_init))
then
201 zero_initial_guess = zero_init
203 zero_initial_guess = .false.
205 max_iter = this%max_iter
207 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
209 if (.not. zero_initial_guess)
then
210 call amg%matvec(w, x, this%lvl)
220 call cmult2(d, r, 1.0_rp/thet, n)
224 do iter = 2, max_iter
225 call amg%matvec(w, d, this%lvl)
228 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
230 tmp2 = 2.0_rp * rhokp1 / delt
233 call add3s2(d, d, r, tmp1, tmp2, n)
244 type(tamg_hierarchy_t),
intent(inout) :: amg
245 integer,
intent(in) :: n
246 real(kind=rp) :: lam, b, a, rn
247 real(kind=rp),
parameter :: boost = 1.1_rp
248 real(kind=rp),
parameter :: lam_factor = 30.0_rp
249 real(kind=rp) :: wtw, dtw, dtd
251 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
252 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
257 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
258 if (this%lvl .eq. 0)
then
259 call gs_h%op(d, n, gs_op_add)
260 call blst%apply(d, n)
262 do i = 1, this%power_its
263 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
265 if (this%lvl .eq. 0)
then
266 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
268 wtw = device_glsc2(this%w_d, this%w_d, n)
271 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
274 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
276 if (this%lvl .eq. 0)
then
277 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
278 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
280 dtw = device_glsc2(this%d_d, this%w_d, n)
281 dtd = device_glsc2(this%d_d, this%d_d, n)
286 this%tha = (b+a)/2.0_rp
287 this%dlt = (b-a)/2.0_rp
289 this%recompute_eigs = .false.
302 integer,
intent(in) :: n
303 real(kind=rp),
dimension(n),
intent(inout) :: x
304 real(kind=rp),
dimension(n),
intent(inout) :: f
307 class(tamg_hierarchy_t),
intent(inout) :: amg
308 type(ksp_monitor_t) :: ksp_results
309 logical,
optional,
intent(in) :: zero_init
310 integer :: iter, max_iter
311 real(kind=rp) :: rtr, rnorm
312 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
313 logical :: zero_initial_guess
315 if (this%recompute_eigs)
then
316 call this%device_comp_eig(amg, n)
318 if (
present(zero_init))
then
319 zero_initial_guess = zero_init
321 zero_initial_guess = .false.
323 max_iter = this%max_iter
325 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
327 call device_copy(r_d, f_d, n)
328 if (.not. zero_initial_guess)
then
329 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
330 call device_sub2(r_d, w_d, n)
339 call device_cmult2(d_d, r_d, 1.0_rp/thet, n)
340 call device_add2(x_d, d_d, n)
342 do iter = 2, max_iter
343 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
344 call device_sub2(r_d, w_d, n)
346 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
348 tmp2 = 2.0_rp * rhokp1 / delt
351 call device_add3s2(d_d, d_d, r_d, tmp1, tmp2, n)
352 call device_add2(x_d, d_d, n)
363 integer,
intent(in) :: n
364 integer,
intent(in) :: lvl
365 integer,
intent(in) :: max_iter
372 this%max_iter = max_iter
382 type(tamg_hierarchy_t),
intent(inout) :: amg
383 integer,
intent(in) :: n
387 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
388 this%d(i) = 1.0_rp / val
390 this%recompute_diag = .false.
400 integer,
intent(in) :: n
401 real(kind=rp),
dimension(n),
intent(inout) :: x
402 real(kind=rp),
dimension(n),
intent(inout) :: f
403 class(tamg_hierarchy_t),
intent(inout) :: amg
404 type(ksp_monitor_t) :: ksp_results
405 integer,
optional,
intent(in) :: niter
406 integer :: iter, max_iter
407 real(kind=rp) :: rtr, rnorm
410 if (this%recompute_diag)
then
411 call this%comp_diag(amg, n)
414 if (
present(niter))
then
417 max_iter = this%max_iter
421 associate( w => this%w, r => this%r, d => this%d)
422 do iter = 1, max_iter
425 call amg%matvec(w, x, this%lvl)
432 call add2s2(x, r, this%omega, n)
438 integer,
intent(in) :: lvl
440 character(len=LOG_SIZE) :: log_buf
442 write(log_buf,
'(A8,I2,A28)')
'-- level', lvl,
'-- init smoother: Chebyshev'
443 call neko_log%message(log_buf)
444 write(log_buf,
'(A22,I6)')
'Iterations:', smoo%max_iter
445 call neko_log%message(log_buf)
449 integer,
intent(in) :: lvl
450 real(kind=rp),
intent(in) :: lam
451 character(len=LOG_SIZE) :: log_buf
453 write(log_buf,
'(A12,I2,A29,F12.3)')
'-- AMG level', lvl, &
454 '-- Chebyshev approx. max eig', lam
455 call neko_log%message(log_buf)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
Implements the base abstract type for Krylov solvers plus helper types.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public sub2(a, b, n)
Vector substraction .
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
A list of allocatable `bc_t`. Follows the standard interface of lists.
Type for storing initial and final residuals in a Krylov solver.
Type for a TreeAMG hierarchy.
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.