49 use,
intrinsic :: iso_c_binding
55 real(kind=
rp),
allocatable :: d(:)
56 real(kind=
rp),
allocatable :: w(:)
57 real(kind=
rp),
allocatable :: r(:)
58 real(kind=
rp) :: omega
61 integer :: max_iter = 10
62 logical :: recompute_diag = .true.
71 real(kind=
rp),
allocatable :: d(:)
72 type(c_ptr) :: d_d = c_null_ptr
73 real(kind=
rp),
allocatable :: w(:)
74 type(c_ptr) :: w_d = c_null_ptr
75 real(kind=
rp),
allocatable :: r(:)
76 type(c_ptr) :: r_d = c_null_ptr
77 real(kind=
rp) :: tha, dlt
80 integer :: power_its = 250
81 integer :: max_iter = 10
82 logical :: recompute_eigs = .true.
99 integer,
intent(in) :: n
100 integer,
intent(in) :: lvl
101 integer,
intent(in) :: max_iter
113 this%max_iter = max_iter
114 this%recompute_eigs = .true.
127 integer,
intent(in) :: n
128 real(kind=
rp) :: lam, b, a, rn
129 real(kind=
rp),
parameter :: boost = 1.1_rp
130 real(kind=
rp),
parameter :: lam_factor = 30.0_rp
131 real(kind=
rp) :: wtw, dtw, dtd
133 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
134 msh=>amg%msh, xh=>amg%Xh, blst=>amg%blst)
142 if (this%lvl .eq. 0)
then
143 call gs_h%op(d, n, gs_op_add)
144 call blst%apply(d, n)
148 do i = 1, this%power_its
150 call amg%matvec(w, d, this%lvl)
152 if (this%lvl .eq. 0)
then
154 wtw =
glsc3(w, coef%mult, w, n)
159 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
161 if (this%lvl .eq. 0)
then
167 call amg%matvec(w, d, this%lvl)
168 if (this%lvl .eq. 0)
then
172 if (this%lvl .eq. 0)
then
173 dtw =
glsc3(d, coef%mult, w, n)
174 dtd =
glsc3(d, coef%mult, d, n)
182 this%tha = (b+a)/2.0_rp
183 this%dlt = (b-a)/2.0_rp
185 this%recompute_eigs = .false.
199 integer,
intent(in) :: n
200 real(kind=rp),
dimension(n),
intent(inout) :: x
201 real(kind=rp),
dimension(n),
intent(inout) :: f
202 class(tamg_hierarchy_t),
intent(inout) :: amg
203 type(ksp_monitor_t) :: ksp_results
204 integer,
optional,
intent(in) :: niter
205 integer :: iter, max_iter
206 real(kind=rp) :: rtr, rnorm
207 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
209 if (this%recompute_eigs)
then
210 call this%comp_eig(amg, n)
213 if (
present(niter))
then
216 max_iter = this%max_iter
219 associate( w => this%w, r => this%r, d => this%d, blst=>amg%blst)
222 call amg%matvec(w, x, this%lvl)
231 call cmult(d, 1.0_rp/thet, n)
233 do iter = 1, max_iter
237 call amg%matvec(w, d, this%lvl)
240 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
242 tmp2 = 2.0_rp * rhokp1 / delt
245 call cmult(d, tmp1, n)
246 call add2s2(d, r, tmp2, n)
257 type(tamg_hierarchy_t),
intent(inout) :: amg
258 integer,
intent(in) :: n
259 real(kind=rp) :: lam, b, a, rn
260 real(kind=rp),
parameter :: boost = 1.1_rp
261 real(kind=rp),
parameter :: lam_factor = 30.0_rp
262 real(kind=rp) :: wtw, dtw, dtd
264 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
265 msh=>amg%msh, xh=>amg%Xh, blst=>amg%blst)
270 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
271 if (this%lvl .eq. 0)
then
272 call gs_h%op(d, n, gs_op_add)
273 call blst%apply(d, n)
275 do i = 1, this%power_its
276 call device_rzero(this%w_d, n)
277 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
278 if (this%lvl .eq. 0)
then
279 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
281 wtw = device_glsc2(this%w_d, this%w_d, n)
283 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
285 call device_rzero(this%w_d, n)
286 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
287 if (this%lvl .eq. 0)
then
288 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
289 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
291 dtw = device_glsc2(this%d_d, this%w_d, n)
292 dtd = device_glsc2(this%d_d, this%d_d, n)
297 this%tha = (b+a)/2.0_rp
298 this%dlt = (b-a)/2.0_rp
299 this%recompute_eigs = .false.
312 integer,
intent(in) :: n
313 real(kind=rp),
dimension(n),
intent(inout) :: x
314 real(kind=rp),
dimension(n),
intent(inout) :: f
317 class(tamg_hierarchy_t),
intent(inout) :: amg
318 type(ksp_monitor_t) :: ksp_results
319 integer,
optional,
intent(in) :: niter
320 integer :: iter, max_iter
321 real(kind=rp) :: rtr, rnorm
322 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
323 if (this%recompute_eigs)
then
324 call this%device_comp_eig(amg, n)
326 if (
present(niter))
then
329 max_iter = this%max_iter
331 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, blst=>amg%blst)
332 call device_copy(r_d, f_d, n)
333 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
334 call device_sub2(r_d, w_d, n)
339 call device_cmult2(d_d, r_d, 1.0_rp/thet, n)
340 do iter = 1, max_iter
341 call device_add2(x_d,d_d,n)
342 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
343 call device_sub2(r_d, w_d, n)
344 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
346 tmp2 = 2.0_rp * rhokp1 / delt
348 call device_add3s2(d_d, d_d, r_d, tmp1, tmp2, n)
359 integer,
intent(in) :: n
360 integer,
intent(in) :: lvl
361 integer,
intent(in) :: max_iter
368 this%max_iter = max_iter
378 type(tamg_hierarchy_t),
intent(inout) :: amg
379 integer,
intent(in) :: n
383 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
384 this%d(i) = 1.0_rp / val
386 this%recompute_diag = .false.
396 integer,
intent(in) :: n
397 real(kind=rp),
dimension(n),
intent(inout) :: x
398 real(kind=rp),
dimension(n),
intent(inout) :: f
399 class(tamg_hierarchy_t),
intent(inout) :: amg
400 type(ksp_monitor_t) :: ksp_results
401 integer,
optional,
intent(in) :: niter
402 integer :: iter, max_iter
403 real(kind=rp) :: rtr, rnorm
406 if (this%recompute_diag)
then
407 call this%comp_diag(amg, n)
410 if (
present(niter))
then
413 max_iter = this%max_iter
417 associate( w => this%w, r => this%r, d => this%d)
418 do iter = 1, max_iter
421 call amg%matvec(w, x, this%lvl)
429 call add2s2(x, r, this%omega, n)
435 integer,
intent(in) :: lvl
437 character(len=LOG_SIZE) :: log_buf
439 write(log_buf,
'(A8,I2,A28)')
'-- level',lvl,
'-- init smoother: Chebyshev'
440 call neko_log%message(log_buf)
441 write(log_buf,
'(A22,I6)')
'Iterations:',smoo%max_iter
442 call neko_log%message(log_buf)
446 integer,
intent(in) :: lvl
447 real(kind=rp),
intent(in) :: lam
448 character(len=LOG_SIZE) :: log_buf
450 write(log_buf,
'(A12,I2,A29,F12.3)')
'-- AMG level',lvl,
'-- Chebyshev approx. max eig', lam
451 call neko_log%message(log_buf)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
Implements the base abstract type for Krylov solvers plus helper types.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
subroutine, public cmult(a, c, n)
Multiplication by constant c .
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public sub2(a, b, n)
Vector substraction .
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, niter)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_solve(this, x, f, n, amg, niter)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
A list of allocatable `bc_t`. Follows the standard interface of lists.
Type for storing initial and final residuals in a Krylov solver.
Type for a TreeAMG hierarchy.
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.