Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
tree_amg_multigrid.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
45 use num_types, only: rp
46 use utils, only : neko_error
47 use math, only : add2
50 use comm
51 use coefs, only : coef_t
52 use mesh, only : mesh_t
53 use space, only : space_t
54 use ax_product, only: ax_t
55 use bc_list, only : bc_list_t
56 use gather_scatter, only : gs_t, gs_op_add
61 use logger, only : neko_log, log_size
64 use, intrinsic :: iso_c_binding
65 implicit none
66 private
67
68 type :: tamg_wrk_t
69 real(kind=rp), allocatable :: r(:)
70 real(kind=rp), allocatable :: rc(:)
71 real(kind=rp), allocatable :: tmp(:)
72 type(c_ptr) :: r_d = c_null_ptr
73 type(c_ptr) :: rc_d = c_null_ptr
74 type(c_ptr) :: tmp_d = c_null_ptr
75 end type tamg_wrk_t
76
78 type, public :: tamg_solver_t
79 type(tamg_hierarchy_t), allocatable :: amg
80 type(amg_cheby_t), allocatable :: smoo(:)
81 type(tamg_wrk_t), allocatable :: wrk(:)
82 !type(amg_jacobi_t), allocatable :: jsmoo(:)
83 integer :: nlvls
84 integer :: max_iter
85 contains
86 procedure, pass(this) :: init => tamg_mg_init
87 procedure, pass(this) :: solve => tamg_mg_solve
88 procedure, pass(this) :: device_solve => tamg_mg_solve_device
89 end type tamg_solver_t
90
91contains
92
102 subroutine tamg_mg_init(this, ax, Xh, coef, msh, gs_h, nlvls_in, blst, max_iter)
103 class(tamg_solver_t), intent(inout), target :: this
104 class(ax_t), target, intent(in) :: ax
105 type(space_t),target, intent(in) :: Xh
106 type(coef_t), target, intent(in) :: coef
107 type(mesh_t), target, intent(in) :: msh
108 type(gs_t), target, intent(in) :: gs_h
109 type(bc_list_t), target, intent(in) :: blst
110 integer, intent(in) :: nlvls_in
111 integer, intent(in) :: max_iter
112 integer :: nlvls, lvl, n, cheby_degree, env_len, mlvl, target_num_aggs
113 integer, allocatable :: agg_nhbr(:,:), asdf(:,:)
114 character(len=255) :: env_cheby_degree, env_mlvl
115 character(len=LOG_SIZE) :: log_buf
116
117 call neko_log%section('AMG')
118
119 call get_environment_variable("NEKO_TAMG_MAX_LVL", &
120 env_mlvl, env_len)
121 if (env_len .eq. 0) then
122 !yeah...
123 nlvls = nlvls_in
124 else
125 read(env_mlvl(1:env_len), *) mlvl
126 nlvls = mlvl
127 end if
128
129 write(log_buf, '(A28,I2,A8)') 'Creating AMG hierarchy with', nlvls, 'levels.'
130 call neko_log%message(log_buf)
131
132 allocate( this%amg )
133 call this%amg%init(ax, xh, coef, msh, gs_h, nlvls, blst)
134
136 call aggregate_finest_level(this%amg, xh%lx, xh%ly, xh%lz, msh%nelv)
137
139 allocate( agg_nhbr, source=msh%facet_neigh )
140 do mlvl = 2, nlvls-1
141 target_num_aggs = this%amg%lvl(mlvl-1)%nnodes / 8
142 call print_preagg_info( mlvl, target_num_aggs)
143 if ( target_num_aggs .lt. 4 ) then
144 call neko_error("TAMG: Too many levels. Not enough DOFs for coarsest grid.")
145 end if
146 call aggregate_greedy( this%amg, mlvl, target_num_aggs, agg_nhbr, asdf)
147 agg_nhbr = asdf
148 deallocate( asdf )
149 end do
150 deallocate( agg_nhbr )
151
153 call aggregate_end(this%amg, nlvls)
154
155 this%max_iter = max_iter
156
157 this%nlvls = this%amg%nlvls!TODO: read from parameter
158 if (this%nlvls .gt. this%amg%nlvls) then
159 call neko_error("Requested number multigrid levels is greater than the initialized AMG levels")
160 end if
161
162 call get_environment_variable("NEKO_TAMG_CHEBY_DEGREE", &
163 env_cheby_degree, env_len)
164 if (env_len .eq. 0) then
165 cheby_degree = 10
166 else
167 read(env_cheby_degree(1:env_len), *) cheby_degree
168 end if
169
170 allocate(this%smoo(0:(nlvls)))
171 do lvl = 0, nlvls-1
172 n = this%amg%lvl(lvl+1)%fine_lvl_dofs
173 call this%smoo(lvl)%init(n ,lvl, cheby_degree)
174 end do
175
177 allocate(this%wrk(nlvls))
178 do lvl = 1, nlvls
179 n = this%amg%lvl(lvl)%fine_lvl_dofs
180 allocate( this%wrk(lvl)%r(n) )
181 allocate( this%wrk(lvl)%rc(n) )
182 allocate( this%wrk(lvl)%tmp(n) )
183 if (neko_bcknd_device .eq. 1) then
184 call device_map( this%wrk(lvl)%r, this%wrk(lvl)%r_d, n)
185 call device_map( this%wrk(lvl)%rc, this%wrk(lvl)%rc_d, n)
186 call device_map( this%wrk(lvl)%tmp, this%wrk(lvl)%tmp_d, n)
187 end if
188 end do
189
190 !allocate(this%jsmoo(0:(nlvls)))
191 !do lvl = 0, nlvls-1
192 ! n = this%amg%lvl(lvl+1)%fine_lvl_dofs
193 ! call this%jsmoo(lvl)%init(n ,lvl, cheby_degree)
194 !end do
195
196 call fill_lvl_map(this%amg)
197
198 call neko_log%end_section()
199
200 end subroutine tamg_mg_init
201
202
207 subroutine tamg_mg_solve(this, z, r, n)
208 integer, intent(in) :: n
209 class(tamg_solver_t), intent(inout) :: this
210 real(kind=rp), dimension(n), intent(inout) :: z
211 real(kind=rp), dimension(n), intent(inout) :: r
212 integer :: iter, max_iter
213
214 max_iter = this%max_iter
215
216 ! Zero out the initial guess becuase we do not handle null spaces very well...
217 z = 0d0
218
219 ! Call the amg cycle
220 do iter = 1, max_iter
221 call tamg_mg_cycle(z, r, n, 0, this%amg, this)
222 end do
223 end subroutine tamg_mg_solve
224
229 subroutine tamg_mg_solve_device(this, z, r, z_d, r_d, n)
230 integer, intent(in) :: n
231 class(tamg_solver_t), intent(inout) :: this
232 real(kind=rp), dimension(n), intent(inout) :: z
233 real(kind=rp), dimension(n), intent(inout) :: r
234 type(c_ptr) :: z_d
235 type(c_ptr) :: r_d
236 integer :: iter, max_iter
237
238 max_iter = this%max_iter
239
240 ! Zero out the initial guess becuase we do not handle null spaces very well...
241 call device_rzero(z_d, n)
242
243 ! Call the amg cycle
244 do iter = 1, max_iter
245 call tamg_mg_cycle_d(z, r, z_d, r_d, n, 0, this%amg, this)
246 end do
247 end subroutine tamg_mg_solve_device
248
256 recursive subroutine tamg_mg_cycle(x, b, n, lvl, amg, mgstuff)
257 integer, intent(in) :: n
258 real(kind=rp), intent(inout) :: x(n)
259 real(kind=rp), intent(inout) :: b(n)
260 type(tamg_hierarchy_t), intent(inout) :: amg
261 type(tamg_solver_t), intent(inout) :: mgstuff
262 integer, intent(in) :: lvl
263 real(kind=rp) :: r(n)
264 real(kind=rp) :: rc(n)
265 real(kind=rp) :: tmp(n)
266 integer :: iter, num_iter
267 integer :: max_lvl
268 integer :: i, cyt
269
270 r = 0d0
271 rc = 0d0
272 tmp = 0d0
273
274 max_lvl = mgstuff%nlvls-1
275
276 !call calc_resid(r,x,b,amg,lvl,n)!> TODO: for debug
277 !print *, "LVL:",lvl, "PRE RESID:", sqrt(glsc2(r, r, n))
281 call mgstuff%smoo(lvl)%solve(x,b, n, amg)
282 !call mgstuff%jsmoo(lvl)%solve(x,b, n, amg)
283 if (lvl .eq. max_lvl) then
284 return
285 end if
286
290 call calc_resid(r,x,b,amg,lvl,n)
291
295 if (lvl .eq. 0) then
296 call average_duplicates(r,amg,lvl,n)
297 end if
298 call amg%interp_f2c(rc, r, lvl+1)
299
303 tmp = 0d0
304 call tamg_mg_cycle(tmp, rc, amg%lvl(lvl+1)%nnodes, lvl+1, amg, mgstuff)
305
309 call amg%interp_c2f(r, tmp, lvl+1)
310 if (lvl .eq. 0) then
311 call average_duplicates(r,amg,lvl,n)
312 end if
313
317 call add2(x, r, n)
318
322 call mgstuff%smoo(lvl)%solve(x,b, n, amg)
323 !call mgstuff%jsmoo(lvl)%solve(x,b, n, amg)
324
328 !call calc_resid(r,x,b,amg,lvl,n)!> TODO: for debug
329 !print *, "LVL:",lvl, "POST RESID:", sqrt(glsc2(r, r, n))
330 end subroutine tamg_mg_cycle
331
339 recursive subroutine tamg_mg_cycle_d(x, b, x_d, b_d, n, lvl, amg, mgstuff)
340 integer, intent(in) :: n
341 real(kind=rp), intent(inout) :: x(n)
342 real(kind=rp), intent(inout) :: b(n)
343 type(c_ptr) :: x_d
344 type(c_ptr) :: b_d
345 type(tamg_hierarchy_t), intent(inout) :: amg
346 type(tamg_solver_t), intent(inout) :: mgstuff
347 integer, intent(in) :: lvl
348 integer :: iter, num_iter
349 integer :: max_lvl
350 integer :: i, cyt
351 max_lvl = mgstuff%nlvls-1
355 call mgstuff%smoo(lvl)%device_solve(x, b, x_d, b_d, n, amg)
356 if (lvl .eq. max_lvl) then
357 return
358 end if
359 associate( r => mgstuff%wrk(lvl+1)%r, r_d => mgstuff%wrk(lvl+1)%r_d, &
360 rc => mgstuff%wrk(lvl+1)%rc, rc_d => mgstuff%wrk(lvl+1)%rc_d, &
361 tmp => mgstuff%wrk(lvl+1)%tmp, tmp_d => mgstuff%wrk(lvl+1)%tmp_d )
365 call amg%device_matvec(r, x, r_d, x_d, lvl)
366 call device_sub3(r_d, b_d, r_d, n)! if (lvl .eq. 0) then! call amg%gs_h%op(r, n, GS_OP_ADD)! call device_col2(r_d, amg%coef%mult_d, n)! end if
374 call amg%interp_f2c_d(rc_d, r_d, lvl+1)
378 call device_rzero(tmp_d, n)
379 call tamg_mg_cycle_d(tmp, rc, tmp_d, rc_d, amg%lvl(lvl+1)%nnodes, lvl+1, amg, mgstuff)
383 call amg%interp_c2f_d(r_d, tmp_d, lvl+1)
384 if (lvl .eq. 0) then
385 call amg%gs_h%op(r, n, gs_op_add)
386 call device_col2(r_d, amg%coef%mult_d, n)
387 end if
391 call device_add2(x_d, r_d, n)
395 call mgstuff%smoo(lvl)%device_solve(x, b, x_d, b_d, n, amg)
396 end associate
397 end subroutine tamg_mg_cycle_d
398
399
407 subroutine calc_resid(r, x, b, amg, lvl, n)
408 integer, intent(in) :: n
409 real(kind=rp), intent(inout) :: r(n)
410 real(kind=rp), intent(inout) :: x(n)
411 real(kind=rp), intent(inout) :: b(n)
412 type(tamg_hierarchy_t), intent(inout) :: amg
413 integer, intent(in) :: lvl
414 integer :: i
415 r = 0d0
416 call amg%matvec(r, x, lvl)
417 do i = 1, n
418 r(i) = b(i) - r(i)
419 end do
420 end subroutine calc_resid
421
427 subroutine average_duplicates(U, amg, lvl, n)
428 integer, intent(in) :: n
429 real(kind=rp), intent(inout) :: u(n)
430 type(tamg_hierarchy_t), intent(inout) :: amg
431 integer, intent(in) :: lvl
432 integer :: i
433 call amg%gs_h%op(u, n, gs_op_add)
434 do i = 1, n
435 u(i) = u(i) * amg%coef%mult(i,1,1,1)
436 end do
437 end subroutine average_duplicates
438
439 subroutine print_preagg_info(lvl,nagg)
440 integer, intent(in) :: lvl,nagg
441 character(len=LOG_SIZE) :: log_buf
442 !TODO: calculate min and max agg size
443 write(log_buf, '(A8,I2,A31)') '-- level',lvl,'-- Calling Greedy Aggregation'
444 call neko_log%message(log_buf)
445 write(log_buf, '(A33,I6)') 'Target Aggregates:',nagg
446 call neko_log%message(log_buf)
447 end subroutine print_preagg_info
448
449 subroutine print_resid_info(r, x, b, r_d, x_d, b_d, amg, lvl, n)
450 integer, intent(in) :: lvl, n
451 real(kind=rp), intent(inout) :: r(n)
452 real(kind=rp), intent(inout) :: x(n)
453 real(kind=rp), intent(inout) :: b(n)
454 type(c_ptr) :: r_d
455 type(c_ptr) :: x_d
456 type(c_ptr) :: b_d
457 type(tamg_hierarchy_t), intent(inout) :: amg
458 real(kind=rp) :: val
459 character(len=LOG_SIZE) :: log_buf
460
461 call amg%device_matvec(r, x, r_d, x_d, lvl)
462 call device_sub3(r_d, b_d, r_d, n)
463 val = device_glsc2(r_d, r_d, n)
464
465 write(log_buf, '(A33,I6,F12.6)') 'tAMG resid:', lvl, val
466 call neko_log%message(log_buf)
467 end subroutine print_resid_info
468
469 subroutine fill_lvl_map(amg)
470 type(tamg_hierarchy_t), intent(inout) :: amg
471 integer :: i, j, k, l, nid, n
472 do j = 1, amg%lvl(1)%nnodes
473 do k = 1, amg%lvl(1)%nodes(j)%ndofs
474 nid = amg%lvl(1)%nodes(j)%dofs(k)
475 amg%lvl(1)%map_finest2lvl(nid) = amg%lvl(1)%nodes(j)%gid
476 end do
477 end do
478 n = size(amg%lvl(1)%map_finest2lvl)
479 do l = 2, amg%nlvls
480 do i = 1, n
481 nid = amg%lvl(l-1)%map_finest2lvl(i)
482 do j = 1, amg%lvl(l)%nnodes
483 do k = 1, amg%lvl(l)%nodes(j)%ndofs
484 if (nid .eq. amg%lvl(l)%nodes(j)%dofs(k)) then
485 amg%lvl(l)%map_finest2lvl(i) = amg%lvl(l)%nodes(j)%gid
486 end if
487 end do
488 end do
489 end do
490 end do
491 if (neko_bcknd_device .eq. 1) then
492 do l = 1, amg%nlvls
493 amg%lvl(l)%map_finest2lvl(0) = n
494 call device_memcpy( amg%lvl(l)%map_finest2lvl, amg%lvl(l)%map_finest2lvl_d, n, host_to_device, .true.)
495 call device_memcpy( amg%lvl(l)%map_f2c, amg%lvl(l)%map_f2c_d, amg%lvl(l)%fine_lvl_dofs+1, host_to_device, .true.)
496 end do
497 end if
498 end subroutine fill_lvl_map
499end module tree_amg_multigrid
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
Gather-scatter.
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:65
integer, parameter, public log_size
Definition log.f90:42
Definition math.f90:60
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:586
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Implements an aggregation for TreeAMG hierarchy structure.
subroutine aggregate_end(tamg, lvl_id)
Aggregate all dofs to a single point to form a tree-like structure.
subroutine aggregate_greedy(tamg, lvl_id, max_aggs, facet_neigh, agg_nhbr)
Aggregates dofs based on adjacent dofs.
subroutine aggregate_finest_level(tamg, lx, ly, lz, ne)
Aggregaiton on finest level Aggregates all dofs in an element into a single aggregate.
Implements multigrid using the TreeAMG hierarchy structure. USE:
subroutine calc_resid(r, x, b, amg, lvl, n)
Wrapper function to calculate residyal.
recursive subroutine tamg_mg_cycle(x, b, n, lvl, amg, mgstuff)
Recrsive multigrid cycle for the TreeAMG solver object.
subroutine tamg_mg_solve_device(this, z, r, z_d, r_d, n)
Solver function for the TreeAMG solver object.
subroutine tamg_mg_init(this, ax, xh, coef, msh, gs_h, nlvls_in, blst, max_iter)
Initialization of the TreeAMG multigrid solver.
subroutine print_preagg_info(lvl, nagg)
subroutine average_duplicates(u, amg, lvl, n)
Wrapper function to gather scatter and average the duplicates.
subroutine fill_lvl_map(amg)
subroutine print_resid_info(r, x, b, r_d, x_d, b_d, amg, lvl, n)
subroutine tamg_mg_solve(this, z, r, n)
Solver function for the TreeAMG solver object.
recursive subroutine tamg_mg_cycle_d(x, b, x_d, b_d, n, lvl, amg, mgstuff)
Recrsive multigrid cycle for the TreeAMG solver object on device.
Implements smoothers for use with TreeAMG matrix vector product.
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
Definition tree_amg.f90:203
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
Definition tree_amg.f90:171
Utilities.
Definition utils.f90:35
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:94
Type for the TreeAMG solver.
Type for Chebyshev iteration using TreeAMG matvec.