Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
tree_amg.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only : rp
36 use utils, only : neko_error
37 use math, only : rzero, col2
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use ax_product, only: ax_t
44 use bc_list, only: bc_list_t
45 use gather_scatter, only : gs_t, gs_op_add
48 use, intrinsic :: iso_c_binding
49 implicit none
50 private
51
53 type, private :: tamg_node_t
54 logical :: isleaf = .true.
55 integer :: gid = -1
56 integer :: lvl = -1
57 integer :: ndofs = 0
58 integer, allocatable :: dofs(:)
59 real(kind=rp) :: xyz(3)
60 real(kind=rp), allocatable :: interp_r(:)
61 real(kind=rp), allocatable :: interp_p(:)
62 end type tamg_node_t
63
65 type, private :: tamg_lvl_t
66 integer :: lvl = -1
67 integer :: nnodes = 0
68 type(tamg_node_t), allocatable :: nodes(:)
69 integer :: fine_lvl_dofs = 0
70 real(kind=rp), allocatable :: wrk_in(:)
71 type(c_ptr) :: wrk_in_d = c_null_ptr
72 real(kind=rp), allocatable :: wrk_out(:)
73 type(c_ptr) :: wrk_out_d = c_null_ptr
74 integer, allocatable :: map_finest2lvl(:)
75 type(c_ptr) :: map_finest2lvl_d = c_null_ptr
76 !--!
77 integer, allocatable :: nodes_ptr(:)
78 type(c_ptr) :: nodes_ptr_d = c_null_ptr
79 integer, allocatable :: nodes_gid(:)
80 type(c_ptr) :: nodes_gid_d = c_null_ptr
81 integer, allocatable :: nodes_dofs(:)
82 type(c_ptr) :: nodes_dofs_d = c_null_ptr
83 integer, allocatable :: map_f2c(:)
84 type(c_ptr) :: map_f2c_d = c_null_ptr
85 ! could make another array of the same size of nodes_dofs
86 ! that stores the parent node gid information
87 ! (similar to nodes_gid that stores the gid of each node)
88 ! then some loops can be simplified to a single loop
89 ! of len(nodes_dofs) instead of going through each node
90 ! and looping through nodes_ptr(i) to nodes_ptr(i+1)-1
91 end type tamg_lvl_t
92
94 type, public :: tamg_hierarchy_t
96 integer :: nlvls
98 type(tamg_lvl_t), allocatable :: lvl(:)
99
101 class(ax_t), pointer :: ax
102 type(mesh_t), pointer :: msh
103 type(space_t), pointer :: xh
104 type(coef_t), pointer :: coef
105 type(gs_t), pointer :: gs_h
106 type(bc_list_t), pointer :: blst
107
108 contains
109 procedure, pass(this) :: init => tamg_init
110 procedure, pass(this) :: matvec => tamg_matvec
111 procedure, pass(this) :: matvec_impl => tamg_matvec_impl
112 procedure, pass(this) :: interp_f2c => tamg_restriction_operator
113 procedure, pass(this) :: interp_c2f => tamg_prolongation_operator
114 procedure, pass(this) :: interp_f2c_d => tamg_device_restriction_operator
115 procedure, pass(this) :: interp_c2f_d => tamg_device_prolongation_operator
116 procedure, pass(this) :: device_matvec => tamg_device_matvec_flat_impl
117 end type tamg_hierarchy_t
118
120
121contains
122
131 subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
132 class(tamg_hierarchy_t), target, intent(inout) :: this
133 class(ax_t), target, intent(in) :: ax
134 type(space_t),target, intent(in) :: Xh
135 type(coef_t), target, intent(in) :: coef
136 type(mesh_t), target, intent(in) :: msh
137 type(gs_t), target, intent(in) :: gs_h
138 integer, intent(in) :: nlvls
139 type(bc_list_t), target, intent(in) :: blst
140 integer :: i, n
141
142 this%ax => ax
143 this%msh => msh
144 this%Xh => xh
145 this%coef => coef
146 this%gs_h => gs_h
147 this%blst => blst
148
149 if (nlvls .lt. 2) then
150 call neko_error("Need to request at least two multigrid levels.")
151 end if
152
153 this%nlvls = nlvls
154 allocate( this%lvl(this%nlvls) )
155
156 do i = 1, nlvls
157 allocate( this%lvl(i)%map_finest2lvl( 0:coef%dof%size() ))
158 if (neko_bcknd_device .eq. 1) then
159 call device_map(this%lvl(i)%map_finest2lvl, this%lvl(i)%map_finest2lvl_d, coef%dof%size()+1)
160 end if
161 end do
162
163 end subroutine tamg_init
164
170 subroutine tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
171 type(tamg_lvl_t), intent(inout) :: tamg_lvl
172 integer, intent(in) :: lvl
173 integer, intent(in) :: nnodes
174 integer, intent(in) :: ndofs
175
176 tamg_lvl%lvl = lvl
177 tamg_lvl%nnodes = nnodes
178 allocate( tamg_lvl%nodes(tamg_lvl%nnodes) )
179 allocate( tamg_lvl%nodes_ptr(tamg_lvl%nnodes+1) )
180 allocate( tamg_lvl%nodes_gid(tamg_lvl%nnodes) )
181 allocate( tamg_lvl%nodes_dofs(ndofs) )
182 allocate( tamg_lvl%map_f2c(0:ndofs) )
183 if (neko_bcknd_device .eq. 1) then
184 call device_map(tamg_lvl%map_f2c, tamg_lvl%map_f2c_d, ndofs+1)
185 end if
186
187 tamg_lvl%fine_lvl_dofs = ndofs
188 allocate( tamg_lvl%wrk_in( ndofs ) )
189 allocate( tamg_lvl%wrk_out( ndofs ) )
190 if (neko_bcknd_device .eq. 1) then
191 call device_map(tamg_lvl%wrk_in, tamg_lvl%wrk_in_d, ndofs)
192 call device_cfill(tamg_lvl%wrk_in_d, 0.0_rp, ndofs)
193 call device_map(tamg_lvl%wrk_out, tamg_lvl%wrk_out_d, ndofs)
194 call device_cfill(tamg_lvl%wrk_out_d, 0.0_rp, ndofs)
195 end if
196 end subroutine tamg_lvl_init
197
202 subroutine tamg_node_init(node, gid, ndofs)
203 type(tamg_node_t), intent(inout) :: node
204 integer, intent(in) :: gid
205 integer, intent(in) :: ndofs
206
207 node%gid = gid
208 node%ndofs = ndofs
209 allocate( node%dofs( node%ndofs) )
210 node%dofs = -1
211 allocate( node%interp_r( node%ndofs) )
212 allocate( node%interp_p( node%ndofs) )
213 node%interp_r = 1.0_rp
214 node%interp_p = 1.0_rp
215 end subroutine tamg_node_init
216
223 recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
224 class(tamg_hierarchy_t), intent(inout) :: this
225 real(kind=rp), intent(inout) :: vec_out(:)
226 real(kind=rp), intent(inout) :: vec_in(:)
227 integer, intent(in) :: lvl_out
228 integer :: i, n, e
229 !call this%matvec_impl(vec_out, vec_in, this%nlvls, lvl_out)
230 call tamg_matvec_flat_impl(this, vec_out, vec_in, this%nlvls, lvl_out)
231 end subroutine tamg_matvec
232
240 recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
241 class(tamg_hierarchy_t), intent(inout) :: this
242 real(kind=rp), intent(inout) :: vec_out(:)
243 real(kind=rp), intent(inout) :: vec_in(:)
244 integer, intent(in) :: lvl
245 integer, intent(in) :: lvl_out
246 integer :: i, n, e
247
248 if (lvl .eq. 0) then
250 n = size(vec_in)
252 call this%gs_h%op(vec_in, n, gs_op_add)
253 call col2( vec_in, this%coef%mult, n)
254
255 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
256 call this%gs_h%op(vec_out, n, gs_op_add)
257 call this%blst%apply(vec_out, n)
258
259 if (lvl_out .ne. 0) then
260 call col2(vec_out, this%coef%mult, n)
261 end if
263 else
264 if (lvl_out .ge. lvl) then
267 associate( wrk_in => this%lvl(lvl)%wrk_in, wrk_out => this%lvl(lvl)%wrk_out)
268 n = this%lvl(lvl)%fine_lvl_dofs
269 call rzero(wrk_in, n)
270 call rzero(vec_out, this%lvl(lvl)%nnodes)
271 do n = 1, this%lvl(lvl)%nnodes
272 associate(node => this%lvl(lvl)%nodes(n))
273 do i = 1, node%ndofs
274 wrk_in( node%dofs(i) ) = wrk_in( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
275 end do
276 end associate
277 end do
278
279 call this%matvec_impl(wrk_out, wrk_in, lvl-1, lvl_out)
280
282 do n = 1, this%lvl(lvl)%nnodes
283 associate(node => this%lvl(lvl)%nodes(n))
284 do i = 1, node%ndofs
285 vec_out( node%gid ) = vec_out(node%gid ) + wrk_out( node%dofs(i) ) * node%interp_r( i )
286 end do
287 end associate
288 end do
289 end associate
290 else if (lvl_out .lt. lvl) then
292 call this%matvec_impl(vec_out, vec_in, lvl-1, lvl_out)
293 else
294 call neko_error("TAMG: matvec level numbering problem.")
295 end if
296 end if
297 end subroutine tamg_matvec_impl
298
299
301 recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
302 class(tamg_hierarchy_t), intent(inout) :: this
303 real(kind=rp), intent(inout) :: vec_out(:)
304 real(kind=rp), intent(inout) :: vec_in(:)
305 integer, intent(in) :: lvl_blah
306 integer, intent(in) :: lvl_out
307 integer :: i, n, cdof, lvl
308
309 lvl = lvl_out
310 n = this%lvl(1)%fine_lvl_dofs
311 if (lvl .eq. 0) then
312 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
313 call this%gs_h%op(vec_out, n, gs_op_add)
314 call this%blst%apply(vec_out, n)
315 else
316 associate( wrk_in => this%lvl(1)%wrk_in, wrk_out => this%lvl(1)%wrk_out)
318 do i = 1, n
319 cdof = this%lvl(lvl)%map_finest2lvl(i)
320 wrk_in(i) = vec_in( cdof )
321 end do
322
324 call this%gs_h%op(wrk_in, n, gs_op_add)
325 call col2( wrk_in, this%coef%mult, n)
326 call this%blst%apply(wrk_in, n)
327
329 call this%ax%compute(wrk_out, wrk_in, this%coef, this%msh, this%Xh)
330 call this%gs_h%op(wrk_out, n, gs_op_add)
331 call this%blst%apply(wrk_out, n)
332
333 call col2(wrk_out, this%coef%mult, n)
334
336 call rzero(vec_out, this%lvl(lvl)%nnodes)
337 do i = 1, n
338 cdof = this%lvl(lvl)%map_finest2lvl(i)
339 vec_out(cdof) = vec_out(cdof) + wrk_out( i )
340 end do
341 end associate
342 end if
343 end subroutine tamg_matvec_flat_impl
344
345
346
351 subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
352 class(tamg_hierarchy_t), intent(inout) :: this
353 real(kind=rp), intent(inout) :: vec_out(:)
354 real(kind=rp), intent(inout) :: vec_in(:)
355 integer, intent(in) :: lvl
356 integer :: i, n, node_start, node_end, node_id
357
358 vec_out = 0d0
359 if (lvl-1 .eq. 0) then
360 call col2(vec_in, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
361 end if
362 do n = 1, this%lvl(lvl)%nnodes
363 associate(node => this%lvl(lvl)%nodes(n))
364 do i = 1, node%ndofs
365 vec_out( node%gid ) = vec_out( node%gid ) + vec_in( node%dofs(i) ) * node%interp_r( i )
366 end do
367 end associate
368 end do
369 end subroutine tamg_restriction_operator
370
375 subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
376 class(tamg_hierarchy_t), intent(inout) :: this
377 real(kind=rp), intent(inout) :: vec_out(:)
378 real(kind=rp), intent(inout) :: vec_in(:)
379 integer, intent(in) :: lvl
380 integer :: i, n, node_start, node_end, node_id
381
382 vec_out = 0d0
383 do n = 1, this%lvl(lvl)%nnodes
384 associate(node => this%lvl(lvl)%nodes(n))
385 do i = 1, node%ndofs
386 vec_out( node%dofs(i) ) = vec_out( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
387 end do
388 end associate
389 end do
390 if (lvl-1 .eq. 0) then
391 call this%gs_h%op(vec_out, this%lvl(lvl)%fine_lvl_dofs, gs_op_add)
392 call col2(vec_out, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
393 call this%blst%apply(vec_out, n)
394 end if
395 end subroutine tamg_prolongation_operator
396
397
398 subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
399 class(tamg_hierarchy_t), intent(inout) :: this
400 real(kind=rp), intent(inout) :: vec_out(:)
401 real(kind=rp), intent(inout) :: vec_in(:)
402 type(c_ptr) :: vec_out_d
403 type(c_ptr) :: vec_in_d
404 integer, intent(in) :: lvl_out
405 integer :: i, n, cdof, lvl
406
407 lvl = lvl_out
408 n = this%lvl(1)%fine_lvl_dofs
409 if (lvl .eq. 0) then
410 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
411 call this%gs_h%op(vec_out, n, gs_op_add, glb_cmd_event)
412 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
413 call this%blst%apply(vec_out, n)
414 else
415
416 associate( wrk_in_d => this%lvl(1)%wrk_in_d, wrk_out_d => this%lvl(1)%wrk_out_d)
418 call device_masked_gather_copy_0(wrk_in_d, vec_in_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
420 call this%gs_h%op(this%lvl(1)%wrk_in, n, gs_op_add, glb_cmd_event)
421 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
422 call device_col2( wrk_in_d, this%coef%mult_d, n)
423 call this%blst%apply(this%lvl(1)%wrk_in, n)
424
426 call this%ax%compute(this%lvl(1)%wrk_out, this%lvl(1)%wrk_in, this%coef, this%msh, this%Xh)
427 call this%gs_h%op(this%lvl(1)%wrk_out, n, gs_op_add, glb_cmd_event)
428 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
429 call this%blst%apply(this%lvl(1)%wrk_out, n)
430
431 call device_col2( wrk_out_d, this%coef%mult_d, n)
432
434 call device_rzero(vec_out_d, this%lvl(lvl)%nnodes)
435 call device_masked_atomic_reduction_0(vec_out_d, wrk_out_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
436 end associate
437
438 end if
439 end subroutine tamg_device_matvec_flat_impl
440
441 subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
442 class(tamg_hierarchy_t), intent(inout) :: this
443 type(c_ptr) :: vec_out_d
444 type(c_ptr) :: vec_in_d
445 integer, intent(in) :: lvl
446 integer :: i, n, m
447 n = this%lvl(lvl)%nnodes
448 m = this%lvl(lvl)%fine_lvl_dofs
449 if (lvl-1 .eq. 0) then
450 call device_col2(vec_in_d, this%coef%mult_d, m)
451 end if
452 call device_rzero(vec_out_d, n)
453 call device_masked_atomic_reduction_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
455
456 subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl, vec_out)
457 class(tamg_hierarchy_t), intent(inout) :: this
458 real(kind=rp), intent(inout) :: vec_out(:)
459 type(c_ptr) :: vec_out_d
460 type(c_ptr) :: vec_in_d
461 integer, intent(in) :: lvl
462 integer :: i, n, m
463 n = this%lvl(lvl)%nnodes
464 m = this%lvl(lvl)%fine_lvl_dofs
465 call device_masked_gather_copy_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
466 if (lvl-1 .eq. 0) then
467 call this%gs_h%op(vec_out, m, gs_op_add, glb_cmd_event)
468 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
469 call device_col2( vec_out_d, this%coef%mult_d, m)
470 call this%blst%apply( vec_out, n)
471 end if
473
474end module tree_amg
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1208
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:62
Gather-scatter.
Definition math.f90:60
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Implements the base type for TreeAMG hierarchy structure.
Definition tree_amg.f90:34
subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
Definition tree_amg.f90:399
subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl, vec_out)
Definition tree_amg.f90:457
recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
Matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * vec_in This is...
Definition tree_amg.f90:241
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
Definition tree_amg.f90:203
subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
Restriction operator for TreeAMG. vec_out = R * vec_in.
Definition tree_amg.f90:352
subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
Prolongation operator for TreeAMG. vec_out = P * vec_in.
Definition tree_amg.f90:376
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
Definition tree_amg.f90:171
recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
Wrapper for matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * ve...
Definition tree_amg.f90:224
subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
Definition tree_amg.f90:442
subroutine tamg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst)
Initialization of TreeAMG hierarchy.
Definition tree_amg.f90:132
recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
Ignore this. For piecewise constant, can create index map directly to finest level.
Definition tree_amg.f90:302
Utilities.
Definition utils.f90:35
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:63
Type for a TreeAMG hierarchy.
Definition tree_amg.f90:94
Type for storing TreeAMG level information.
Definition tree_amg.f90:65
Type for storing TreeAMG tree node information.
Definition tree_amg.f90:53