Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tree_amg_smoother.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use tree_amg, only : tamg_hierarchy_t
37 use num_types, only : rp
38 use math, only : col2, add2, add2s2, glsc2, glsc3, sub2, cmult, &
43 use krylov, only : ksp_monitor_t
44 use bc_list, only: bc_list_t
45 use gather_scatter, only : gs_t, gs_op_add
46 use logger, only : neko_log, log_size
49 use, intrinsic :: iso_c_binding
50 implicit none
51 private
52
54 type, public :: amg_jacobi_t
55 real(kind=rp), allocatable :: d(:)
56 real(kind=rp), allocatable :: w(:)
57 real(kind=rp), allocatable :: r(:)
58 real(kind=rp) :: omega
59 integer :: lvl
60 integer :: n
61 integer :: max_iter = 10
62 logical :: recompute_diag = .true.
63 contains
64 procedure, pass(this) :: init => amg_jacobi_init
65 procedure, pass(this) :: solve => amg_jacobi_solve
66 procedure, pass(this) :: comp_diag => amg_jacobi_diag
67 end type amg_jacobi_t
68
70 type, public :: amg_cheby_t
71 real(kind=rp), allocatable :: d(:)
72 type(c_ptr) :: d_d = c_null_ptr
73 real(kind=rp), allocatable :: w(:)
74 type(c_ptr) :: w_d = c_null_ptr
75 real(kind=rp), allocatable :: r(:)
76 type(c_ptr) :: r_d = c_null_ptr
77 real(kind=rp) :: tha, dlt
78 integer :: lvl
79 integer :: n
80 integer :: power_its = 250
81 integer :: max_iter = 10
82 logical :: recompute_eigs = .true.
83 contains
84 procedure, pass(this) :: init => amg_cheby_init
85 procedure, pass(this) :: solve => amg_cheby_solve
86 procedure, pass(this) :: comp_eig => amg_cheby_power
87 procedure, pass(this) :: device_solve => amg_device_cheby_solve
88 procedure, pass(this) :: device_comp_eig => amg_device_cheby_power
89 end type amg_cheby_t
90
91contains
92
97 subroutine amg_cheby_init(this, n, lvl, max_iter)
98 class(amg_cheby_t), intent(inout), target :: this
99 integer, intent(in) :: n
100 integer, intent(in) :: lvl
101 integer, intent(in) :: max_iter
102
103 allocate(this%d(n))
104 allocate(this%w(n))
105 allocate(this%r(n))
106 if (neko_bcknd_device .eq. 1) then
107 call device_map(this%d, this%d_d, n)
108 call device_map(this%w, this%w_d, n)
109 call device_map(this%r, this%r_d, n)
110 end if
111 this%n = n
112 this%lvl = lvl
113 this%max_iter = max_iter
114 this%recompute_eigs = .true.
115
116 call amg_smoo_monitor(lvl, this)
117
118 end subroutine amg_cheby_init
119
120
124 subroutine amg_cheby_power(this, amg, n)
125 class(amg_cheby_t), intent(inout) :: this
126 type(tamg_hierarchy_t), intent(inout) :: amg
127 integer, intent(in) :: n
128 real(kind=rp) :: lam, b, a, rn
129 real(kind=rp), parameter :: boost = 1.1_rp
130 real(kind=rp), parameter :: lam_factor = 30.0_rp
131 real(kind=rp) :: wtw, dtw, dtd
132 integer :: i
133 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
134 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
135
136 do i = 1, n
137 !call random_number(rn)
138 !d(i) = rn + 10.0_rp
139 d(i) = sin(real(i))
140 end do
141 if (this%lvl .eq. 0) then
142 call gs_h%op(d, n, gs_op_add)!TODO
143 call blst%apply(d, n)
144 end if
145 !Power method to get lamba max
146 do i = 1, this%power_its
147 call amg%matvec(w, d, this%lvl)
148
149 if (this%lvl .eq. 0) then
150 wtw = glsc3(w, coef%mult, w, n)
151 else
152 wtw = glsc2(w, w, n)
153 end if
154
155 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
156 end do
157
158 call amg%matvec(w, d, this%lvl)
159
160 if (this%lvl .eq. 0) then
161 dtw = glsc3(d, coef%mult, w, n)
162 dtd = glsc3(d, coef%mult, d, n)
163 else
164 dtw = glsc2(d, w, n)
165 dtd = glsc2(d, d, n)
166 end if
167 lam = dtw / dtd
168 b = lam * boost
169 a = lam / lam_factor
170 this%tha = (b+a)/2.0_rp
171 this%dlt = (b-a)/2.0_rp
172
173 this%recompute_eigs = .false.
174 call amg_cheby_monitor(this%lvl, lam)
175 end associate
176 end subroutine amg_cheby_power
177
184 subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
185 class(amg_cheby_t), intent(inout) :: this
186 integer, intent(in) :: n
187 real(kind=rp), dimension(n), intent(inout) :: x
188 real(kind=rp), dimension(n), intent(inout) :: f
189 class(tamg_hierarchy_t), intent(inout) :: amg
190 type(ksp_monitor_t) :: ksp_results
191 logical, optional, intent(in) :: zero_init
192 integer :: iter, max_iter
193 real(kind=rp) :: rtr, rnorm
194 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
195 logical :: zero_initial_guess
196
197 if (this%recompute_eigs) then
198 call this%comp_eig(amg, n)
199 end if
200 if (present(zero_init)) then
201 zero_initial_guess = zero_init
202 else
203 zero_initial_guess = .false.
204 end if
205 max_iter = this%max_iter
206
207 associate( w => this%w, r => this%r, d => this%d, blst => amg%blst)
208 call copy(r, f, n)
209 if (.not. zero_initial_guess) then
210 call amg%matvec(w, x, this%lvl)
211 call sub2(r, w, n)
212 end if
213
214 thet = this%tha
215 delt = this%dlt
216 s1 = thet / delt
217 rhok = 1.0_rp / s1
218
219 ! First iteration
220 call cmult2(d, r, 1.0_rp/thet, n)
221 call add2(x, d, n)
222
223 ! Rest of iterations
224 do iter = 2, max_iter
225 call amg%matvec(w, d, this%lvl)
226 call sub2(r, w, n)
227
228 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
229 tmp1 = rhokp1 * rhok
230 tmp2 = 2.0_rp * rhokp1 / delt
231 rhok = rhokp1
232
233 call add3s2(d, d, r, tmp1, tmp2, n)
234 call add2(x, d, n)
235 end do
236 end associate
237 end subroutine amg_cheby_solve
238
242 subroutine amg_device_cheby_power(this, amg, n)
243 class(amg_cheby_t), intent(inout) :: this
244 type(tamg_hierarchy_t), intent(inout) :: amg
245 integer, intent(in) :: n
246 real(kind=rp) :: lam, b, a, rn
247 real(kind=rp), parameter :: boost = 1.1_rp
248 real(kind=rp), parameter :: lam_factor = 30.0_rp
249 real(kind=rp) :: wtw, dtw, dtd
250 integer :: i
251 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
252 msh => amg%msh, xh => amg%Xh, blst => amg%blst)
253 do i = 1, n
254 !TODO: replace with a better way to initialize power method
255 d(i) = sin(real(i))
256 end do
257 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
258 if (this%lvl .eq. 0) then
259 call gs_h%op(d, n, gs_op_add)!TODO
260 call blst%apply(d, n)
261 end if
262 do i = 1, this%power_its
263 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
264
265 if (this%lvl .eq. 0) then
266 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
267 else
268 wtw = device_glsc2(this%w_d, this%w_d, n)
269 end if
270
271 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
272 end do
273
274 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
275
276 if (this%lvl .eq. 0) then
277 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
278 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
279 else
280 dtw = device_glsc2(this%d_d, this%w_d, n)
281 dtd = device_glsc2(this%d_d, this%d_d, n)
282 end if
283 lam = dtw / dtd
284 b = lam * boost
285 a = lam / lam_factor
286 this%tha = (b+a)/2.0_rp
287 this%dlt = (b-a)/2.0_rp
288
289 this%recompute_eigs = .false.
290 call amg_cheby_monitor(this%lvl, lam)
291 end associate
292 end subroutine amg_device_cheby_power
293
300 subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
301 class(amg_cheby_t), intent(inout) :: this
302 integer, intent(in) :: n
303 real(kind=rp), dimension(n), intent(inout) :: x
304 real(kind=rp), dimension(n), intent(inout) :: f
305 type(c_ptr) :: x_d
306 type(c_ptr) :: f_d
307 class(tamg_hierarchy_t), intent(inout) :: amg
308 type(ksp_monitor_t) :: ksp_results
309 logical, optional, intent(in) :: zero_init
310 integer :: iter, max_iter
311 real(kind=rp) :: rtr, rnorm
312 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
313 logical :: zero_initial_guess
314
315 if (this%recompute_eigs) then
316 call this%device_comp_eig(amg, n)
317 end if
318 if (present(zero_init)) then
319 zero_initial_guess = zero_init
320 else
321 zero_initial_guess = .false.
322 end if
323 max_iter = this%max_iter
324
325 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, &
326 blst => amg%blst)
327 call device_copy(r_d, f_d, n)
328 if (.not. zero_initial_guess) then
329 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
330 call device_sub2(r_d, w_d, n)
331 end if
332
333 thet = this%tha
334 delt = this%dlt
335 s1 = thet / delt
336 rhok = 1.0_rp / s1
337
338 ! First iteration
339 call device_cmult2(d_d, r_d, 1.0_rp/thet, n)
340 call device_add2(x_d, d_d, n)
341 ! Rest of iterations
342 do iter = 2, max_iter
343 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
344 call device_sub2(r_d, w_d, n)
345
346 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
347 tmp1 = rhokp1 * rhok
348 tmp2 = 2.0_rp * rhokp1 / delt
349 rhok = rhokp1
350
351 call device_add3s2(d_d, d_d, r_d, tmp1, tmp2, n)
352 call device_add2(x_d, d_d, n)
353 end do
354 end associate
355 end subroutine amg_device_cheby_solve
356
361 subroutine amg_jacobi_init(this, n, lvl, max_iter)
362 class(amg_jacobi_t), intent(inout), target :: this
363 integer, intent(in) :: n
364 integer, intent(in) :: lvl
365 integer, intent(in) :: max_iter
366
367 allocate(this%d(n))
368 allocate(this%w(n))
369 allocate(this%r(n))
370 this%n = n
371 this%lvl = lvl
372 this%max_iter = max_iter
373 this%omega = 0.7_rp
374
375 end subroutine amg_jacobi_init
376
380 subroutine amg_jacobi_diag(this, amg, n)
381 class(amg_jacobi_t), intent(inout) :: this
382 type(tamg_hierarchy_t), intent(inout) :: amg
383 integer, intent(in) :: n
384 real(kind=rp) :: val
385 integer :: i
386 do i = 1, n
387 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
388 this%d(i) = 1.0_rp / val
389 end do
390 this%recompute_diag = .false.
391 end subroutine amg_jacobi_diag
392
398 subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
399 class(amg_jacobi_t), intent(inout) :: this
400 integer, intent(in) :: n
401 real(kind=rp), dimension(n), intent(inout) :: x
402 real(kind=rp), dimension(n), intent(inout) :: f
403 class(tamg_hierarchy_t), intent(inout) :: amg
404 type(ksp_monitor_t) :: ksp_results
405 integer, optional, intent(in) :: niter
406 integer :: iter, max_iter
407 real(kind=rp) :: rtr, rnorm
408 integer :: i
409
410 if (this%recompute_diag) then
411 call this%comp_diag(amg, n)
412 end if
413
414 if (present(niter)) then
415 max_iter = niter
416 else
417 max_iter = this%max_iter
418 end if
419
420 ! x = x + omega * Dinv( f - Ax )
421 associate( w => this%w, r => this%r, d => this%d)
422 do iter = 1, max_iter
423 w = 0.0_rp
425 call amg%matvec(w, x, this%lvl)
427 call copy(r, f, n)
428 call sub2(r, w, n)
430 call col2(r, d, n)
432 call add2s2(x, r, this%omega, n)
433 end do
434 end associate
435 end subroutine amg_jacobi_solve
436
437 subroutine amg_smoo_monitor(lvl, smoo)
438 integer, intent(in) :: lvl
439 class(amg_cheby_t), intent(in) :: smoo
440 character(len=LOG_SIZE) :: log_buf
441
442 write(log_buf, '(A8,I2,A28)') '-- level', lvl, '-- init smoother: Chebyshev'
443 call neko_log%message(log_buf)
444 write(log_buf, '(A22,I6)') 'Iterations:', smoo%max_iter
445 call neko_log%message(log_buf)
446 end subroutine amg_smoo_monitor
447
448 subroutine amg_cheby_monitor(lvl, lam)
449 integer, intent(in) :: lvl
450 real(kind=rp), intent(in) :: lam
451 character(len=LOG_SIZE) :: log_buf
452
453 write(log_buf, '(A12,I2,A29,F12.3)') '-- AMG level', lvl, &
454 '-- Chebyshev approx. max eig', lam
455 call neko_log%message(log_buf)
456 end subroutine amg_cheby_monitor
457
458end module tree_amg_smoother
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:70
integer, parameter, public log_size
Definition log.f90:46
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:417
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:429
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1007
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:732
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:901
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:860
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:774
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:818
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_solve(this, x, f, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, zero_init)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:94
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.