Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tree_amg_smoother.f90
Go to the documentation of this file.
1! Copyright (c) 2024-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use tree_amg, only : tamg_hierarchy_t
37 use num_types, only : rp
38 use math, only : col2, add2, add2s2, glsc2, glsc3, sub2, cmult, &
43 use krylov, only : ksp_monitor_t
44 use bc_list, only: bc_list_t
45 use gather_scatter, only : gs_t, gs_op_add
46 use logger, only : neko_log, log_size
51 use, intrinsic :: iso_c_binding
52 implicit none
53 private
54
56 type, public :: amg_jacobi_t
57 real(kind=rp), allocatable :: d(:)
58 real(kind=rp), allocatable :: w(:)
59 real(kind=rp), allocatable :: r(:)
60 real(kind=rp) :: omega
61 integer :: lvl
62 integer :: n
63 integer :: max_iter = 10
64 logical :: recompute_diag = .true.
65 contains
66 procedure, pass(this) :: init => amg_jacobi_init
67 procedure, pass(this) :: solve => amg_jacobi_solve
68 procedure, pass(this) :: comp_diag => amg_jacobi_diag
69 end type amg_jacobi_t
70
72 type, public :: amg_cheby_t
73 real(kind=rp), allocatable :: d(:)
74 type(c_ptr) :: d_d = c_null_ptr
75 real(kind=rp), allocatable :: w(:)
76 type(c_ptr) :: w_d = c_null_ptr
77 real(kind=rp), allocatable :: r(:)
78 type(c_ptr) :: r_d = c_null_ptr
79 real(kind=rp) :: tha, dlt
80 integer :: lvl
81 integer :: n
82 integer :: power_its = 250
83 integer :: max_iter = 10
84 logical :: recompute_eigs = .true.
85 contains
86 procedure, pass(this) :: init => amg_cheby_init
87 procedure, pass(this) :: solve => amg_cheby_solve
88 procedure, pass(this) :: comp_eig => amg_cheby_power
89 procedure, pass(this) :: device_solve => amg_device_cheby_solve
90 procedure, pass(this) :: device_comp_eig => amg_device_cheby_power
91 end type amg_cheby_t
92
93contains
94
99 subroutine amg_cheby_init(this, n, lvl, max_iter)
100 class(amg_cheby_t), intent(inout), target :: this
101 integer, intent(in) :: n
102 integer, intent(in) :: lvl
103 integer, intent(in) :: max_iter
104
105 allocate(this%d(n))
106 allocate(this%w(n))
107 allocate(this%r(n))
108 if (neko_bcknd_device .eq. 1) then
109 call device_map(this%d, this%d_d, n)
110 call device_map(this%w, this%w_d, n)
111 call device_map(this%r, this%r_d, n)
112 end if
113 this%n = n
114 this%lvl = lvl
115 this%max_iter = max_iter
116 this%recompute_eigs = .true.
117
118 call amg_smoo_monitor(lvl, this)
119
120 end subroutine amg_cheby_init
121
122
126 subroutine amg_cheby_power(this, amg, n)
127 class(amg_cheby_t), intent(inout) :: this
128 type(tamg_hierarchy_t), intent(inout) :: amg
129 integer, intent(in) :: n
130 real(kind=rp) :: lam, b, a, rn
131 real(kind=rp), parameter :: boost = 1.1_rp
132 real(kind=rp), parameter :: lam_factor = 30.0_rp
133 real(kind=rp) :: wtw, dtw, dtd
134 integer :: i
135 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
136 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
137
138 do i = 1, n
139 !call random_number(rn)
140 !d(i) = rn + 10.0_rp
141 d(i) = sin(real(i))
142 end do
143 if (this%lvl .eq. 0) then
144 call gs_h%op(d, n, gs_op_add)!TODO
145 call blst%apply(d, n)
146 end if
147 !Power method to get lamba max
148 do i = 1, this%power_its
149 call amg%matvec(w, d, this%lvl)
150
151 if (this%lvl .eq. 0) then
152 wtw = glsc3(w, coef%mult, w, n)
153 else
154 wtw = glsc2(w, w, n)
155 end if
156
157 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
158 end do
159
160 call amg%matvec(w, d, this%lvl)
161
162 if (this%lvl .eq. 0) then
163 dtw = glsc3(d, coef%mult, w, n)
164 dtd = glsc3(d, coef%mult, d, n)
165 else
166 dtw = glsc2(d, w, n)
167 dtd = glsc2(d, d, n)
168 end if
169 lam = dtw / dtd
170 b = lam * boost
171 a = lam / lam_factor
172 this%tha = (b+a)/2.0_rp
173 this%dlt = (b-a)/2.0_rp
174
175 this%recompute_eigs = .false.
176 call amg_cheby_monitor(this%lvl, lam)
177 end associate
178 end subroutine amg_cheby_power
179
186 subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
187 class(amg_cheby_t), intent(inout) :: this
188 integer, intent(in) :: n
189 real(kind=rp), dimension(n), intent(inout) :: x
190 real(kind=rp), dimension(n), intent(inout) :: f
191 class(tamg_hierarchy_t), intent(inout) :: amg
192 type(ksp_monitor_t) :: ksp_results
193 logical, optional, intent(in) :: zero_init
194 integer :: iter, max_iter, i
195 real(kind=rp) :: rtr, rnorm
196 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
197 logical :: zero_initial_guess
198
199 if (this%recompute_eigs) then
200 call this%comp_eig(amg, n)
201 end if
202 if (present(zero_init)) then
203 zero_initial_guess = zero_init
204 else
205 zero_initial_guess = .false.
206 end if
207 max_iter = this%max_iter
208
209 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
210 call copy(r, f, n)
211 if (.not. zero_initial_guess) then
212 call amg%matvec(w, x, this%lvl)
213 call sub2(r, w, n)
214 end if
215
216 thet = this%tha
217 delt = this%dlt
218 s1 = thet / delt
219 rhok = 1.0_rp / s1
220
221 ! First iteration
222 do concurrent(i = 1:n)
223 d(i) = 1.0_rp/thet * r(i)
224 x(i) = x(i) + d(i)
225 end do
226
227 ! Rest of iterations
228 do iter = 2, max_iter
229 call amg%matvec(w, d, this%lvl)
230
231 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
232 tmp1 = rhokp1 * rhok
233 tmp2 = 2.0_rp * rhokp1 / delt
234 rhok = rhokp1
235
236 do concurrent(i = 1:n)
237 r(i) = r(i) - w(i)
238 d(i) = tmp1 * d(i) + tmp2 * r(i)
239 x(i) = x(i) + d(i)
240 end do
241
242 end do
243 end associate
244 end subroutine amg_cheby_solve
245
249 subroutine amg_device_cheby_power(this, amg, n)
250 class(amg_cheby_t), intent(inout) :: this
251 type(tamg_hierarchy_t), intent(inout) :: amg
252 integer, intent(in) :: n
253 real(kind=rp) :: lam, b, a, rn
254 real(kind=rp), parameter :: boost = 1.1_rp
255 real(kind=rp), parameter :: lam_factor = 30.0_rp
256 real(kind=rp) :: wtw, dtw, dtd
257 integer :: i
258 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
259 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
260 do i = 1, n
261 !TODO: replace with a better way to initialize power method
262 d(i) = sin(real(i))
263 end do
264 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
265 if (this%lvl .eq. 0) then
266 call gs_h%op(d, n, gs_op_add)!TODO
267 call blst%apply(d, n)
268 end if
269 do i = 1, this%power_its
270 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
271
272 if (this%lvl .eq. 0) then
273 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
274 else
275 wtw = device_glsc2(this%w_d, this%w_d, n)
276 end if
277
278 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
279 end do
280
281 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
282
283 if (this%lvl .eq. 0) then
284 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
285 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
286 else
287 dtw = device_glsc2(this%d_d, this%w_d, n)
288 dtd = device_glsc2(this%d_d, this%d_d, n)
289 end if
290 lam = dtw / dtd
291 b = lam * boost
292 a = lam / lam_factor
293 this%tha = (b+a)/2.0_rp
294 this%dlt = (b-a)/2.0_rp
295
296 this%recompute_eigs = .false.
297 call amg_cheby_monitor(this%lvl, lam)
298 end associate
299 end subroutine amg_device_cheby_power
300
307 subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
308 class(amg_cheby_t), intent(inout) :: this
309 integer, intent(in) :: n
310 real(kind=rp), dimension(n), intent(inout) :: x
311 real(kind=rp), dimension(n), intent(inout) :: f
312 type(c_ptr) :: x_d
313 type(c_ptr) :: f_d
314 class(tamg_hierarchy_t), intent(inout) :: amg
315 type(ksp_monitor_t) :: ksp_results
316 logical, optional, intent(in) :: zero_init
317 integer :: iter, max_iter
318 real(kind=rp) :: rtr, rnorm
319 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
320 logical :: zero_initial_guess
321
322 if (this%recompute_eigs) then
323 call this%device_comp_eig(amg, n)
324 end if
325 if (present(zero_init)) then
326 zero_initial_guess = zero_init
327 else
328 zero_initial_guess = .false.
329 end if
330 max_iter = this%max_iter
331
332 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
333 blst => amg%blst)
334
335 if (.not. zero_initial_guess) then
336 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
337 end if
338
339 thet = this%tha
340 delt = this%dlt
341 s1 = thet / delt
342 rhok = 1.0_rp / s1
343
344 ! First iteration
345 tmp1 = 1.0_rp / thet
346 call amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
347 tmp1, n, zero_initial_guess)
348 ! Rest of iterations
349 do iter = 2, max_iter
350 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
351
352 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
353 tmp1 = rhokp1 * rhok
354 tmp2 = 2.0_rp * rhokp1 / delt
355 rhok = rhokp1
356
357 call amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
358
359 end do
360 end associate
361 end subroutine amg_device_cheby_solve
362
367 subroutine amg_jacobi_init(this, n, lvl, max_iter)
368 class(amg_jacobi_t), intent(inout), target :: this
369 integer, intent(in) :: n
370 integer, intent(in) :: lvl
371 integer, intent(in) :: max_iter
372
373 allocate(this%d(n))
374 allocate(this%w(n))
375 allocate(this%r(n))
376 this%n = n
377 this%lvl = lvl
378 this%max_iter = max_iter
379 this%omega = 0.7_rp
380
381 end subroutine amg_jacobi_init
382
386 subroutine amg_jacobi_diag(this, amg, n)
387 class(amg_jacobi_t), intent(inout) :: this
388 type(tamg_hierarchy_t), intent(inout) :: amg
389 integer, intent(in) :: n
390 real(kind=rp) :: val
391 integer :: i
392 do i = 1, n
393 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
394 this%d(i) = 1.0_rp / val
395 end do
396 this%recompute_diag = .false.
397 end subroutine amg_jacobi_diag
398
404 subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
405 class(amg_jacobi_t), intent(inout) :: this
406 integer, intent(in) :: n
407 real(kind=rp), dimension(n), intent(inout) :: x
408 real(kind=rp), dimension(n), intent(inout) :: f
409 class(tamg_hierarchy_t), intent(inout) :: amg
410 type(ksp_monitor_t) :: ksp_results
411 integer, optional, intent(in) :: niter
412 integer :: iter, max_iter
413 real(kind=rp) :: rtr, rnorm
414 integer :: i
415
416 if (this%recompute_diag) then
417 call this%comp_diag(amg, n)
418 end if
419
420 if (present(niter)) then
421 max_iter = niter
422 else
423 max_iter = this%max_iter
424 end if
425
426 ! x = x + omega * Dinv( f - Ax )
427 associate( w => this%w, r => this%r, d => this%d)
428 do iter = 1, max_iter
429 w = 0.0_rp
431 call amg%matvec(w, x, this%lvl)
433 call copy(r, f, n)
434 call sub2(r, w, n)
436 call col2(r, d, n)
438 call add2s2(x, r, this%omega, n)
439 end do
440 end associate
441 end subroutine amg_jacobi_solve
442
443 subroutine amg_smoo_monitor(lvl, smoo)
444 integer, intent(in) :: lvl
445 class(amg_cheby_t), intent(in) :: smoo
446 character(len=LOG_SIZE) :: log_buf
447
448 write(log_buf, '(A8,I2,A28)') '-- level', lvl, '-- init smoother: Chebyshev'
449 call neko_log%message(log_buf)
450 write(log_buf, '(A22,I6)') 'Iterations:', smoo%max_iter
451 call neko_log%message(log_buf)
452 end subroutine amg_smoo_monitor
453
454 subroutine amg_cheby_monitor(lvl, lam)
455 integer, intent(in) :: lvl
456 real(kind=rp), intent(in) :: lam
457 character(len=LOG_SIZE) :: log_buf
458
459 write(log_buf, '(A12,I2,A29,F12.3)') '-- AMG level', lvl, &
460 '-- Chebyshev approx. max eig', lam
461 call neko_log%message(log_buf)
462 end subroutine amg_cheby_monitor
463
464end module tree_amg_smoother
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:70
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:423
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1048
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:768
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:812
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:94
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.