Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
tree_amg_smoother.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use tree_amg, only : tamg_hierarchy_t
37 use num_types, only : rp
38 use math, only : col2, add2, add2s2, glsc2, glsc3, sub2, cmult, &
43 use krylov, only : ksp_monitor_t
44 use bc_list, only: bc_list_t
45 use gather_scatter, only : gs_t, gs_op_add
46 use logger, only : neko_log, log_size
49 use, intrinsic :: iso_c_binding
50 implicit none
51 private
52
54 type, public :: amg_jacobi_t
55 real(kind=rp), allocatable :: d(:)
56 real(kind=rp), allocatable :: w(:)
57 real(kind=rp), allocatable :: r(:)
58 real(kind=rp) :: omega
59 integer :: lvl
60 integer :: n
61 integer :: max_iter = 10
62 logical :: recompute_diag = .true.
63 contains
64 procedure, pass(this) :: init => amg_jacobi_init
65 procedure, pass(this) :: solve => amg_jacobi_solve
66 procedure, pass(this) :: comp_diag => amg_jacobi_diag
67 end type amg_jacobi_t
68
70 type, public :: amg_cheby_t
71 real(kind=rp), allocatable :: d(:)
72 type(c_ptr) :: d_d = c_null_ptr
73 real(kind=rp), allocatable :: w(:)
74 type(c_ptr) :: w_d = c_null_ptr
75 real(kind=rp), allocatable :: r(:)
76 type(c_ptr) :: r_d = c_null_ptr
77 real(kind=rp) :: tha, dlt
78 integer :: lvl
79 integer :: n
80 integer :: power_its = 250
81 integer :: max_iter = 10
82 logical :: recompute_eigs = .true.
83 contains
84 procedure, pass(this) :: init => amg_cheby_init
85 procedure, pass(this) :: solve => amg_cheby_solve
86 procedure, pass(this) :: comp_eig => amg_cheby_power
87 procedure, pass(this) :: device_solve => amg_device_cheby_solve
88 procedure, pass(this) :: device_comp_eig => amg_device_cheby_power
89 end type amg_cheby_t
90
91contains
92
97 subroutine amg_cheby_init(this, n, lvl, max_iter)
98 class(amg_cheby_t), intent(inout), target :: this
99 integer, intent(in) :: n
100 integer, intent(in) :: lvl
101 integer, intent(in) :: max_iter
102
103 allocate(this%d(n))
104 allocate(this%w(n))
105 allocate(this%r(n))
106 if (neko_bcknd_device .eq. 1) then
107 call device_map(this%d, this%d_d, n)
108 call device_map(this%w, this%w_d, n)
109 call device_map(this%r, this%r_d, n)
110 end if
111 this%n = n
112 this%lvl = lvl
113 this%max_iter = max_iter
114 this%recompute_eigs = .true.
115
116 call amg_smoo_monitor(lvl,this)
117
118 end subroutine amg_cheby_init
119
120
124 subroutine amg_cheby_power(this, amg, n)
125 class(amg_cheby_t), intent(inout) :: this
126 type(tamg_hierarchy_t), intent(inout) :: amg
127 integer, intent(in) :: n
128 real(kind=rp) :: lam, b, a, rn
129 real(kind=rp), parameter :: boost = 1.1_rp
130 real(kind=rp), parameter :: lam_factor = 30.0_rp
131 real(kind=rp) :: wtw, dtw, dtd
132 integer :: i
133 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
134 msh=>amg%msh, xh=>amg%Xh, blst=>amg%blst)
135
136 do i = 1, n
137 !TODO: replace with a better way to initialize power method
138 !call random_number(rn)
139 !d(i) = rn + 10.0_rp
140 d(i) = sin(real(i))
141 end do
142 if (this%lvl .eq. 0) then
143 call gs_h%op(d, n, gs_op_add)!TODO
144 call blst%apply(d, n)
145 end if
146
147 !Power method to get lamba max
148 do i = 1, this%power_its
149 w = 0d0
150 call amg%matvec(w, d, this%lvl)
151
152 if (this%lvl .eq. 0) then
153 !call blst%apply(w, n)
154 wtw = glsc3(w, coef%mult, w, n)
155 else
156 wtw = glsc2(w, w, n)
157 end if
158
159 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
160
161 if (this%lvl .eq. 0) then
162 !call blst%apply(d, n)
163 end if
164 end do
165
166 w = 0d0
167 call amg%matvec(w, d, this%lvl)
168 if (this%lvl .eq. 0) then
169 !call blst%apply(w, n)
170 end if
171
172 if (this%lvl .eq. 0) then
173 dtw = glsc3(d, coef%mult, w, n)
174 dtd = glsc3(d, coef%mult, d, n)
175 else
176 dtw = glsc2(d, w, n)
177 dtd = glsc2(d, d, n)
178 end if
179 lam = dtw / dtd
180 b = lam * boost
181 a = lam / lam_factor
182 this%tha = (b+a)/2.0_rp
183 this%dlt = (b-a)/2.0_rp
184
185 this%recompute_eigs = .false.
186
187 call amg_cheby_monitor(this%lvl,lam)
188 end associate
189 end subroutine amg_cheby_power
190
197 subroutine amg_cheby_solve(this, x, f, n, amg, niter)
198 class(amg_cheby_t), intent(inout) :: this
199 integer, intent(in) :: n
200 real(kind=rp), dimension(n), intent(inout) :: x
201 real(kind=rp), dimension(n), intent(inout) :: f
202 class(tamg_hierarchy_t), intent(inout) :: amg
203 type(ksp_monitor_t) :: ksp_results
204 integer, optional, intent(in) :: niter
205 integer :: iter, max_iter
206 real(kind=rp) :: rtr, rnorm
207 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
208
209 if (this%recompute_eigs) then
210 call this%comp_eig(amg, n)
211 end if
212
213 if (present(niter)) then
214 max_iter = niter
215 else
216 max_iter = this%max_iter
217 end if
218
219 associate( w => this%w, r => this%r, d => this%d, blst=>amg%blst)
220 call copy(r, f, n)
221 w = 0d0
222 call amg%matvec(w, x, this%lvl)
223 call sub2(r, w, n)
224
225 thet = this%tha
226 delt = this%dlt
227 s1 = thet / delt
228 rhok = 1.0_rp / s1
229
230 call copy(d, r, n)
231 call cmult(d, 1.0_rp/thet, n)
232
233 do iter = 1, max_iter
234 call add2(x,d,n)
235
236 w = 0d0
237 call amg%matvec(w, d, this%lvl)
238 call sub2(r, w, n)
239
240 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
241 tmp1 = rhokp1 * rhok
242 tmp2 = 2.0_rp * rhokp1 / delt
243 rhok = rhokp1
244
245 call cmult(d, tmp1, n)
246 call add2s2(d, r, tmp2, n)
247 end do
248
249 end associate
250 end subroutine amg_cheby_solve
251
255 subroutine amg_device_cheby_power(this, amg, n)
256 class(amg_cheby_t), intent(inout) :: this
257 type(tamg_hierarchy_t), intent(inout) :: amg
258 integer, intent(in) :: n
259 real(kind=rp) :: lam, b, a, rn
260 real(kind=rp), parameter :: boost = 1.1_rp
261 real(kind=rp), parameter :: lam_factor = 30.0_rp
262 real(kind=rp) :: wtw, dtw, dtd
263 integer :: i
264 associate(w => this%w, d => this%d, coef => amg%coef, gs_h => amg%gs_h, &
265 msh=>amg%msh, xh=>amg%Xh, blst=>amg%blst)
266 do i = 1, n
267 !TODO: replace with a better way to initialize power method
268 d(i) = sin(real(i))
269 end do
270 call device_memcpy(this%d, this%d_d, n, host_to_device, .true.)
271 if (this%lvl .eq. 0) then
272 call gs_h%op(d, n, gs_op_add)!TODO
273 call blst%apply(d, n)
274 end if
275 do i = 1, this%power_its
276 call device_rzero(this%w_d, n)
277 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
278 if (this%lvl .eq. 0) then
279 wtw = device_glsc3(this%w_d, coef%mult_d, this%w_d, n)
280 else
281 wtw = device_glsc2(this%w_d, this%w_d, n)
282 end if
283 call device_cmult2(this%d_d, this%w_d, 1.0_rp/sqrt(wtw), n)
284 end do
285 call device_rzero(this%w_d, n)
286 call amg%device_matvec(w, d, this%w_d, this%d_d, this%lvl)
287 if (this%lvl .eq. 0) then
288 dtw = device_glsc3(this%d_d, coef%mult_d, this%w_d, n)
289 dtd = device_glsc3(this%d_d, coef%mult_d, this%d_d, n)
290 else
291 dtw = device_glsc2(this%d_d, this%w_d, n)
292 dtd = device_glsc2(this%d_d, this%d_d, n)
293 end if
294 lam = dtw / dtd
295 b = lam * boost
296 a = lam / lam_factor
297 this%tha = (b+a)/2.0_rp
298 this%dlt = (b-a)/2.0_rp
299 this%recompute_eigs = .false.
300 call amg_cheby_monitor(this%lvl,lam)
301 end associate
302 end subroutine amg_device_cheby_power
303
310 subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, niter)
311 class(amg_cheby_t), intent(inout) :: this
312 integer, intent(in) :: n
313 real(kind=rp), dimension(n), intent(inout) :: x
314 real(kind=rp), dimension(n), intent(inout) :: f
315 type(c_ptr) :: x_d
316 type(c_ptr) :: f_d
317 class(tamg_hierarchy_t), intent(inout) :: amg
318 type(ksp_monitor_t) :: ksp_results
319 integer, optional, intent(in) :: niter
320 integer :: iter, max_iter
321 real(kind=rp) :: rtr, rnorm
322 real(kind=rp) :: rhok, rhokp1, s1, thet, delt, tmp1, tmp2
323 if (this%recompute_eigs) then
324 call this%device_comp_eig(amg, n)
325 end if
326 if (present(niter)) then
327 max_iter = niter
328 else
329 max_iter = this%max_iter
330 end if
331 associate( w_d => this%w_d, r_d => this%r_d, d_d => this%d_d, blst=>amg%blst)
332 call device_copy(r_d, f_d, n)
333 call amg%device_matvec(this%w, x, w_d, x_d, this%lvl)
334 call device_sub2(r_d, w_d, n)
335 thet = this%tha
336 delt = this%dlt
337 s1 = thet / delt
338 rhok = 1.0_rp / s1
339 call device_cmult2(d_d, r_d, 1.0_rp/thet, n)
340 do iter = 1, max_iter
341 call device_add2(x_d,d_d,n)
342 call amg%device_matvec(this%w, this%d, w_d, d_d, this%lvl)
343 call device_sub2(r_d, w_d, n)
344 rhokp1 = 1.0_rp / (2.0_rp * s1 - rhok)
345 tmp1 = rhokp1 * rhok
346 tmp2 = 2.0_rp * rhokp1 / delt
347 rhok = rhokp1
348 call device_add3s2(d_d, d_d, r_d, tmp1, tmp2, n)
349 end do
350 end associate
351 end subroutine amg_device_cheby_solve
352
357 subroutine amg_jacobi_init(this, n, lvl, max_iter)
358 class(amg_jacobi_t), intent(inout), target :: this
359 integer, intent(in) :: n
360 integer, intent(in) :: lvl
361 integer, intent(in) :: max_iter
362
363 allocate(this%d(n))
364 allocate(this%w(n))
365 allocate(this%r(n))
366 this%n = n
367 this%lvl = lvl
368 this%max_iter = max_iter
369 this%omega = 0.7_rp
370
371 end subroutine amg_jacobi_init
372
376 subroutine amg_jacobi_diag(this, amg, n)
377 class(amg_jacobi_t), intent(inout) :: this
378 type(tamg_hierarchy_t), intent(inout) :: amg
379 integer, intent(in) :: n
380 real(kind=rp) :: val
381 integer :: i
382 do i = 1, n
383 call tamg_sample_matrix_val(val, amg, this%lvl, i, i)
384 this%d(i) = 1.0_rp / val
385 end do
386 this%recompute_diag = .false.
387 end subroutine amg_jacobi_diag
388
394 subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
395 class(amg_jacobi_t), intent(inout) :: this
396 integer, intent(in) :: n
397 real(kind=rp), dimension(n), intent(inout) :: x
398 real(kind=rp), dimension(n), intent(inout) :: f
399 class(tamg_hierarchy_t), intent(inout) :: amg
400 type(ksp_monitor_t) :: ksp_results
401 integer, optional, intent(in) :: niter
402 integer :: iter, max_iter
403 real(kind=rp) :: rtr, rnorm
404 integer :: i
405
406 if (this%recompute_diag) then
407 call this%comp_diag(amg, n)
408 end if
409
410 if (present(niter)) then
411 max_iter = niter
412 else
413 max_iter = this%max_iter
414 end if
415
416 ! x = x + omega * Dinv( f - Ax )
417 associate( w => this%w, r => this%r, d => this%d)
418 do iter = 1, max_iter
419 w = 0.0_rp
421 call amg%matvec(w, x, this%lvl)
423 call copy(r, f, n)
424 call sub2(r, w, n)
425 !print *, iter, "RES", sqrt(glsc2(r,r,n))!>DEBUG
427 call col2(r, d, n)
429 call add2s2(x, r, this%omega, n)
430 end do
431 end associate
432 end subroutine amg_jacobi_solve
433
434 subroutine amg_smoo_monitor(lvl,smoo)
435 integer, intent(in) :: lvl
436 class(amg_cheby_t), intent(in) :: smoo
437 character(len=LOG_SIZE) :: log_buf
438
439 write(log_buf, '(A8,I2,A28)') '-- level',lvl,'-- init smoother: Chebyshev'
440 call neko_log%message(log_buf)
441 write(log_buf, '(A22,I6)') 'Iterations:',smoo%max_iter
442 call neko_log%message(log_buf)
443 end subroutine amg_smoo_monitor
444
445 subroutine amg_cheby_monitor(lvl,lam)
446 integer, intent(in) :: lvl
447 real(kind=rp), intent(in) :: lam
448 character(len=LOG_SIZE) :: log_buf
449
450 write(log_buf, '(A12,I2,A29,F12.3)') '-- AMG level',lvl,'-- Chebyshev approx. max eig', lam
451 call neko_log%message(log_buf)
452 end subroutine amg_cheby_monitor
453
454end module tree_amg_smoother
double real
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Defines a list of bc_t.
Definition bc_list.f90:34
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:310
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:700
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:875
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:586
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:728
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:628
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:672
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Implements smoothers for use with TreeAMG matrix vector product.
subroutine amg_jacobi_init(this, n, lvl, max_iter)
Initialization of Jacobi (this is expensive...)
subroutine amg_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_device_cheby_power(this, amg, n)
Power method to approximate largest eigenvalue.
subroutine amg_cheby_init(this, n, lvl, max_iter)
Initialization of chebyshev.
subroutine amg_jacobi_diag(this, amg, n)
SAMPLE MATRIX DIAGONAL VALUES (DO NOT USE, EXPENSIVE)
subroutine amg_jacobi_solve(this, x, f, n, amg, niter)
Jacobi smoother.
subroutine amg_smoo_monitor(lvl, smoo)
subroutine amg_device_cheby_solve(this, x, f, x_d, f_d, n, amg, niter)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_solve(this, x, f, n, amg, niter)
Chebyshev smoother From Saad's iterative methods textbook.
subroutine amg_cheby_monitor(lvl, lam)
Implements utilities for the TreeAMG hierarchy structure.
subroutine, public tamg_sample_matrix_val(val, amg, lvl, i, j)
Sample the values in a matix (expensive, use with caution)
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:94
Type for Chebyshev iteration using TreeAMG matvec.
Type for Chebyshev iteration using TreeAMG matvec.