Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tree_amg_smoother.f90
Go to the documentation of this file.
1! Copyright (c) 2024-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use tree_amg, only : tamg_hierarchy_t
37 use num_types, only : rp
38 use math, only : col2, add2, add2s2, glsc2, glsc3, sub2, cmult, &
43 use krylov, only : ksp_monitor_t
44 use bc_list, only: bc_list_t
45 use gather_scatter, only : gs_t, gs_op_add
46 use logger, only : neko_log, log_size
52 use, intrinsic :: iso_c_binding
53 implicit none
54 private
55
57 type, public :: amg_jacobi_t
58 real(kind=rp), allocatable :: d(:)
59 real(kind=rp), allocatable :: w(:)
60 real(kind=rp), allocatable :: r(:)
61 real(kind=rp) :: omega
62 integer :: lvl
63 integer :: n
64 integer :: max_iter = 10
65 logical :: recompute_diag = .true.
66 contains
67 procedure, pass(this) :: init => amg_jacobi_init
68 procedure, pass(this) :: solve => amg_jacobi_solve
69 procedure, pass(this) :: comp_diag => amg_jacobi_diag
70 procedure, pass(this) :: free => amg_jacobi_free
71 end type amg_jacobi_t
72
74 type, public :: amg_cheby_t
75 real(kind=rp), allocatable :: d(:)
76 type(c_ptr) :: d_d = c_null_ptr
77 real(kind=rp), allocatable :: w(:)
78 type(c_ptr) :: w_d = c_null_ptr
79 real(kind=rp), allocatable :: r(:)
80 type(c_ptr) :: r_d = c_null_ptr
81 real(kind=rp) :: tha, dlt
82 integer :: lvl
83 integer :: n
84 integer :: power_its = 250
85 integer :: max_iter = 10
86 logical :: recompute_eigs = .true.
87 contains
88 procedure, pass(this) :: init => amg_cheby_init
89 procedure, pass(this) :: solve => amg_cheby_solve
90 procedure, pass(this) :: comp_eig => amg_cheby_power
91 procedure, pass(this) :: device_solve => amg_device_cheby_solve
92 procedure, pass(this) :: device_comp_eig => amg_device_cheby_power
93 procedure, pass(this) :: free => amg_cheby_free
94 end type amg_cheby_t
95
96contains
97
102 subroutine amg_cheby_init(this, n, lvl, max_iter)
103 class(amg_cheby_t), intent(inout), target :: this
104 integer, intent(in) :: n
105 integer, intent(in) :: lvl
106 integer, intent(in) :: max_iter
107
108 allocate(this%d(n))
109 allocate(this%w(n))
110 allocate(this%r(n))
111 if (neko_bcknd_device .eq. 1) then
112 call device_map(this%d, this%d_d, n)
113 call device_map(this%w, this%w_d, n)
114 call device_map(this%r, this%r_d, n)
115 end if
116 this%n = n
117 this%lvl = lvl
118 this%max_iter = max_iter
119 this%recompute_eigs = .true.
120
121 call amg_smoo_monitor(lvl, this)
122
123 end subroutine amg_cheby_init
124
126 subroutine amg_cheby_free(this)
127 class(amg_cheby_t), intent(inout), target :: this
128 if (neko_bcknd_device .eq. 1) then
129 call device_deassociate(this%d)
130 call device_deassociate(this%w)
131 call device_deassociate(this%r)
132 end if
133 if (allocated(this%d)) then
134 deallocate(this%d)
135 end if
136 if (allocated(this%w)) then
137 deallocate(this%w)
138 end if
139 if (allocated(this%r)) then
140 deallocate(this%r)
141 end if
142 if (neko_bcknd_device .eq. 1) then
143 call device_free(this%d_d)
144 call device_free(this%w_d)
145 call device_free(this%r_d)
146 end if
147 end subroutine amg_cheby_free
148
149
153 subroutine amg_cheby_power(this, amg, n)
154 class(amg_cheby_t), intent(inout) :: this
155 type(tamg_hierarchy_t), intent(inout) :: amg
156 integer, intent(in) :: n
157 real(kind=rp) :: lam, b, a, rn
158 real(kind=rp), parameter :: boost = 1.1_rp
159 real(kind=rp), parameter :: lam_factor = 30.0_rp
160 real(kind=rp) :: wtw, dtw, dtd
161 integer :: i
162 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
163 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
164
165 do i = 1, n
166 !call random_number(rn)
167 !d(i) = rn + 10.0_rp
168 d(i) = sin(real(i))
169 end do
170 if (this%lvl .eq. 0) then
171 call gs_h%op(d, n, gs_op_add)!TODO
172 call blst%apply(d, n)
173 end if
174 !Power method to get lamba max
175 do i = 1, this%power_its
176 call amg%matvec(w, d, this%lvl)
177
178 if (this%lvl .eq. 0) then
179 wtw = glsc3(w, coef%mult, w, n)
180 else
181 wtw = glsc2(w, w, n)
182 end if
183
184 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
185 end do
186
187 call amg%matvec(w, d, this%lvl)
188
189 if (this%lvl .eq. 0) then
190 dtw = glsc3(d, coef%mult, w, n)
191 dtd = glsc3(d, coef%mult, d, n)
192 else
193 dtw = glsc2(d, w, n)
194 dtd = glsc2(d, d, n)
195 end if
196 lam = dtw / dtd
197 b = lam * boost
198 a = lam / lam_factor
199 this%tha = (b+a)/2.0_rp
200 this%dlt = (b-a)/2.0_rp
201
202 this%recompute_eigs = .false.
203 call amg_cheby_monitor(this%lvl, lam)
204 end associate
205 end subroutine amg_cheby_power
206
213 subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
214 class(amg_cheby_t), intent(inout) :: this
215 integer, intent(in) :: n
216 real(kind=rp), dimension(n), intent(inout) :: x
217 real(kind=rp), dimension(n), intent(inout) :: f
218 class(tamg_hierarchy_t), intent(inout) :: amg
219 type(ksp_monitor_t) :: ksp_results
220 logical, optional, intent(in) :: zero_init
221 integer :: iter, max_iter, i
222 real(kind=rp) :: rtr, rnorm
223 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
224 logical :: zero_initial_guess
225
226 if (this%recompute_eigs) then
227 call this%comp_eig(amg, n)
228 end if
229 if (present(zero_init)) then
230 zero_initial_guess = zero_init
231 else
232 zero_initial_guess = .false.
233 end if
234 max_iter = this%max_iter
235
236 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
237 call copy(r, f, n)
238 if (.not. zero_initial_guess) then
239 call amg%matvec(w, x, this%lvl)
240 call sub2(r, w, n)
241 end if
242
243 thet = this%tha
244 delt = this%dlt
245 s1 = thet / delt
246 rhok = 1.0_rp / s1
247
248 ! First iteration
249 do concurrent(i = 1:n)
250 d(i) = 1.0_rp/thet * r(i)
251 x(i) = x(i) + d(i)
252 end do
253
254 ! Rest of iterations
255 do iter = 2, max_iter
256 call amg%matvec(w, d, this%lvl)
257
258 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
259 tmp1 = rhokp1 * rhok
260 tmp2 = 2.0_rp * rhokp1 / delt
261 rhok = rhokp1
262
263 do concurrent(i = 1:n)
264 r(i) = r(i) - w(i)
265 d(i) = tmp1 * d(i) + tmp2 * r(i)
266 x(i) = x(i) + d(i)
267 end do
268
269 end do
270 end associate
271 end subroutine amg_cheby_solve
272
276 subroutine amg_device_cheby_power(this, amg, n)
277 class(amg_cheby_t), intent(inout) :: this
278 type(tamg_hierarchy_t), intent(inout) :: amg
279 integer, intent(in) :: n
280 real(kind=rp) :: lam, b, a, rn
281 real(kind=rp), parameter :: boost = 1.1_rp
282 real(kind=rp), parameter :: lam_factor = 30.0_rp
283 real(kind=rp) :: wtw, dtw, dtd
284 integer :: i
285 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
286 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
287 do i = 1, n
288 !TODO: replace with a better way to initialize power method
289 d(i) = sin(real(i))
290 end do
291 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
292 if (this%lvl .eq. 0) then
293 call gs_h%op(d, n, gs_op_add)!TODO
294 call blst%apply(d, n)
295 end if
296 do i = 1, this%power_its
297 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
298
299 if (this%lvl .eq. 0) then
300 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
301 else
302 wtw = device_glsc2(this%w_d, this%w_d, n)
303 end if
304
305 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
306 end do
307
308 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
309
310 if (this%lvl .eq. 0) then
311 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
312 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
313 else
314 dtw = device_glsc2(this%d_d, this%w_d, n)
315 dtd = device_glsc2(this%d_d, this%d_d, n)
316 end if
317 lam = dtw / dtd
318 b = lam * boost
319 a = lam / lam_factor
320 this%tha = (b+a)/2.0_rp
321 this%dlt = (b-a)/2.0_rp
322
323 this%recompute_eigs = .false.
324 call amg_cheby_monitor(this%lvl, lam)
325 end associate
326 end subroutine amg_device_cheby_power
327
334 subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
335 class(amg_cheby_t), intent(inout) :: this
336 integer, intent(in) :: n
337 real(kind=rp), dimension(n), intent(inout) :: x
338 real(kind=rp), dimension(n), intent(inout) :: f
339 type(c_ptr) :: x_d
340 type(c_ptr) :: f_d
341 class(tamg_hierarchy_t), intent(inout) :: amg
342 type(ksp_monitor_t) :: ksp_results
343 logical, optional, intent(in) :: zero_init
344 integer :: iter, max_iter
345 real(kind=rp) :: rtr, rnorm
346 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
347 logical :: zero_initial_guess
348
349 if (this%recompute_eigs) then
350 call this%device_comp_eig(amg, n)
351 end if
352 if (present(zero_init)) then
353 zero_initial_guess = zero_init
354 else
355 zero_initial_guess = .false.
356 end if
357 max_iter = this%max_iter
358
359 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
360 blst => amg%blst)
361
362 if (.not. zero_initial_guess) then
363 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
364 end if
365
366 thet = this%tha
367 delt = this%dlt
368 s1 = thet / delt
369 rhok = 1.0_rp / s1
370
371 ! First iteration
372 tmp1 = 1.0_rp / thet
373 call amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
374 tmp1, n, zero_initial_guess)
375 ! Rest of iterations
376 do iter = 2, max_iter
377 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
378
379 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
380 tmp1 = rhokp1 * rhok
381 tmp2 = 2.0_rp * rhokp1 / delt
382 rhok = rhokp1
383
384 call amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
385
386 end do
387 end associate
388 end subroutine amg_device_cheby_solve
389
394 subroutine amg_jacobi_init(this, n, lvl, max_iter)
395 class(amg_jacobi_t), intent(inout), target :: this
396 integer, intent(in) :: n
397 integer, intent(in) :: lvl
398 integer, intent(in) :: max_iter
399
400 allocate(this%d(n))
401 allocate(this%w(n))
402 allocate(this%r(n))
403 this%n = n
404 this%lvl = lvl
405 this%max_iter = max_iter
406 this%omega = 0.7_rp
407
408 end subroutine amg_jacobi_init
409
411 subroutine amg_jacobi_free(this)
412 class(amg_jacobi_t), intent(inout), target :: this
413 if (allocated(this%d)) then
414 deallocate(this%d)
415 end if
416 if (allocated(this%w)) then
417 deallocate(this%w)
418 end if
419 if (allocated(this%r)) then
420 deallocate(this%r)
421 end if
422 end subroutine amg_jacobi_free
423
427 subroutine amg_jacobi_diag(this, amg, n)
428 class(amg_jacobi_t), intent(inout) :: this
429 type(tamg_hierarchy_t), intent(inout) :: amg
430 integer, intent(in) :: n
431 real(kind=rp) :: val
432 integer :: i
433 do i = 1, n
434 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
435 this%d(i) = 1.0_rp / val
436 end do
437 this%recompute_diag = .false.
438 end subroutine amg_jacobi_diag
439
445 subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
446 class(amg_jacobi_t), intent(inout) :: this
447 integer, intent(in) :: n
448 real(kind=rp), dimension(n), intent(inout) :: x
449 real(kind=rp), dimension(n), intent(inout) :: f
450 class(tamg_hierarchy_t), intent(inout) :: amg
451 type(ksp_monitor_t) :: ksp_results
452 integer, optional, intent(in) :: niter
453 integer :: iter, max_iter
454 real(kind=rp) :: rtr, rnorm
455 integer :: i
456
457 if (this%recompute_diag) then
458 call this%comp_diag(amg, n)
459 end if
460
461 if (present(niter)) then
462 max_iter = niter
463 else
464 max_iter = this%max_iter
465 end if
466
467 ! x = x + omega * Dinv( f - Ax )
468 associate( w => this%w, r => this%r, d => this%d)
469 do iter = 1, max_iter
470 w = 0.0_rp
472 call amg%matvec(w, x, this%lvl)
474 call copy(r, f, n)
475 call sub2(r, w, n)
477 call col2(r, d, n)
479 call add2s2(x, r, this%omega, n)
480 end do
481 end associate
482 end subroutine amg_jacobi_solve
483
484 subroutine amg_smoo_monitor(lvl, smoo)
485 integer, intent(in) :: lvl
486 class(amg_cheby_t), intent(in) :: smoo
487 character(len=LOG_SIZE) :: log_buf
488
489 write(log_buf, '(A8,I2,A28)') '-- level', lvl, '-- init smoother: Chebyshev'
490 call neko_log%message(log_buf)
491 write(log_buf, '(A22,I6)') 'Iterations:', smoo%max_iter
492 call neko_log%message(log_buf)
493 end subroutine amg_smoo_monitor
494
495 subroutine amg_cheby_monitor(lvl, lam)
496 integer, intent(in) :: lvl
497 real(kind=rp), intent(in) :: lam
498 character(len=LOG_SIZE) :: log_buf
499
500 write(log_buf, '(A12,I2,A29,F12.3)') '-- AMG level', lvl, &
501 '-- Chebyshev approx. max eig', lam
502 call neko_log%message(log_buf)
503 end subroutine amg_cheby_monitor
504
505end module tree_amg_smoother
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
double real
Deassociate a Fortran array from a device pointer.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:76
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:423
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1048
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:768
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:812
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_cheby_free(this)
free cheby data
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_free(this)
free jacobi data
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:87
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.