49 use,
intrinsic :: iso_c_binding
55 logical :: isleaf = .true.
59 integer,
allocatable :: dofs(:)
60 real(kind=
rp) :: xyz(3)
61 real(kind=
rp),
allocatable :: interp_r(:)
62 real(kind=
rp),
allocatable :: interp_p(:)
72 integer :: fine_lvl_dofs = 0
73 real(kind=
rp),
allocatable :: wrk_in(:)
74 type(c_ptr) :: wrk_in_d = c_null_ptr
75 real(kind=
rp),
allocatable :: wrk_out(:)
76 type(c_ptr) :: wrk_out_d = c_null_ptr
77 integer,
allocatable :: map_finest2lvl(:)
78 type(c_ptr) :: map_finest2lvl_d = c_null_ptr
80 integer,
allocatable :: map_f2c(:)
81 type(c_ptr) :: map_f2c_d = c_null_ptr
98 type(
gs_t),
pointer :: gs_h
125 subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
127 class(
ax_t),
target,
intent(in) :: ax
128 type(
space_t),
target,
intent(in) :: Xh
129 type(
coef_t),
target,
intent(in) :: coef
130 type(
mesh_t),
target,
intent(in) :: msh
131 type(
gs_t),
target,
intent(in) :: gs_h
132 integer,
intent(in) :: nlvls
133 type(
bc_list_t),
target,
intent(in) :: blst
143 if (nlvls .lt. 2)
then
144 call neko_error(
"Need to request at least two multigrid levels.")
148 allocate( this%lvl(this%nlvls) )
151 allocate( this%lvl(i)%map_finest2lvl( 0:coef%dof%size() ))
153 call device_map(this%lvl(i)%map_finest2lvl, this%lvl(i)%map_finest2lvl_d, coef%dof%size()+1)
163 if (
allocated(this%lvl))
then
167 do i = 1,
size(this%lvl)
168 call this%lvl(i)%free()
187 integer,
intent(in) :: lvl
188 integer,
intent(in) :: nnodes
189 integer,
intent(in) :: ndofs
192 tamg_lvl%nnodes = nnodes
193 allocate( tamg_lvl%nodes(tamg_lvl%nnodes) )
194 allocate( tamg_lvl%map_f2c(0:ndofs) )
196 call device_map(tamg_lvl%map_f2c, tamg_lvl%map_f2c_d, ndofs+1)
199 tamg_lvl%fine_lvl_dofs = ndofs
200 allocate( tamg_lvl%wrk_in( ndofs ) )
201 allocate( tamg_lvl%wrk_out( ndofs ) )
203 call device_map(tamg_lvl%wrk_in, tamg_lvl%wrk_in_d, ndofs)
205 call device_map(tamg_lvl%wrk_out, tamg_lvl%wrk_out_d, ndofs)
221 if (
allocated(this%nodes))
then
222 do i = 1, this%nnodes
223 call this%nodes(i)%free()
225 deallocate(this%nodes)
227 if (
allocated(this%wrk_in))
then
228 deallocate(this%wrk_in)
230 if (
allocated(this%wrk_out))
then
231 deallocate(this%wrk_out)
233 if (
allocated(this%map_f2c))
then
234 deallocate(this%map_f2c)
236 if (
allocated(this%map_finest2lvl))
then
237 deallocate(this%map_finest2lvl)
247 this%fine_lvl_dofs = 0
256 integer,
intent(in) :: gid
257 integer,
intent(in) :: ndofs
261 allocate( node%dofs( node%ndofs) )
263 allocate( node%interp_r( node%ndofs) )
264 allocate( node%interp_p( node%ndofs) )
265 node%interp_r = 1.0_rp
266 node%interp_p = 1.0_rp
273 if (
allocated(this%dofs))
then
274 deallocate(this%dofs)
276 if (
allocated(this%interp_r))
then
277 deallocate(this%interp_r)
279 if (
allocated(this%interp_p))
then
280 deallocate(this%interp_p)
292 real(kind=
rp),
intent(inout) :: vec_out(:)
293 real(kind=
rp),
intent(inout) :: vec_in(:)
294 integer,
intent(in) :: lvl_out
309 real(kind=
rp),
intent(inout) :: vec_out(:)
310 real(kind=
rp),
intent(inout) :: vec_in(:)
311 integer,
intent(in) :: lvl
312 integer,
intent(in) :: lvl_out
319 call this%gs_h%op(vec_in, n, gs_op_add)
320 call col2( vec_in, this%coef%mult, n)
322 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
323 call this%gs_h%op(vec_out, n, gs_op_add)
324 call this%blst%apply(vec_out, n)
326 if (lvl_out .ne. 0)
then
327 call col2(vec_out, this%coef%mult, n)
331 if (lvl_out .ge. lvl)
then
334 associate( wrk_in => this%lvl(lvl)%wrk_in, wrk_out => this%lvl(lvl)%wrk_out)
335 n = this%lvl(lvl)%fine_lvl_dofs
336 call rzero(wrk_in, n)
337 call rzero(vec_out, this%lvl(lvl)%nnodes)
338 do n = 1, this%lvl(lvl)%nnodes
339 associate(node => this%lvl(lvl)%nodes(n))
341 wrk_in( node%dofs(i) ) = wrk_in( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
346 call this%matvec_impl(wrk_out, wrk_in, lvl-1, lvl_out)
349 do n = 1, this%lvl(lvl)%nnodes
350 associate(node => this%lvl(lvl)%nodes(n))
352 vec_out( node%gid ) = vec_out(node%gid ) + wrk_out( node%dofs(i) ) * node%interp_r( i )
357 else if (lvl_out .lt. lvl)
then
359 call this%matvec_impl(vec_out, vec_in, lvl-1, lvl_out)
361 call neko_error(
"TAMG: matvec level numbering problem.")
370 real(kind=rp),
intent(inout) :: vec_out(:)
371 real(kind=rp),
intent(inout) :: vec_in(:)
372 integer,
intent(in) :: lvl_blah
373 integer,
intent(in) :: lvl_out
374 integer :: i, n, cdof, lvl
377 n = this%lvl(1)%fine_lvl_dofs
379 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
380 call this%gs_h%op(vec_out, n, gs_op_add)
381 call this%blst%apply(vec_out, n)
383 associate( wrk_in => this%lvl(1)%wrk_in, wrk_out => this%lvl(1)%wrk_out)
386 cdof = this%lvl(lvl)%map_finest2lvl(i)
387 wrk_in(i) = vec_in( cdof )
391 call this%gs_h%op(wrk_in, n, gs_op_add)
392 call col2( wrk_in, this%coef%mult, n)
393 call this%blst%apply(wrk_in, n)
396 call this%ax%compute(wrk_out, wrk_in, this%coef, this%msh, this%Xh)
397 call this%gs_h%op(wrk_out, n, gs_op_add)
398 call this%blst%apply(wrk_out, n)
400 call col2(wrk_out, this%coef%mult, n)
403 call rzero(vec_out, this%lvl(lvl)%nnodes)
405 cdof = this%lvl(lvl)%map_finest2lvl(i)
406 vec_out(cdof) = vec_out(cdof) + wrk_out( i )
420 real(kind=rp),
intent(inout) :: vec_out(:)
421 real(kind=rp),
intent(inout) :: vec_in(:)
422 integer,
intent(in) :: lvl
423 integer :: i, n, node_start, node_end, node_id
426 if (lvl-1 .eq. 0)
then
427 call col2(vec_in, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
429 do n = 1, this%lvl(lvl)%nnodes
430 associate(node => this%lvl(lvl)%nodes(n))
432 vec_out( node%gid ) = vec_out( node%gid ) + vec_in( node%dofs(i) ) * node%interp_r( i )
444 real(kind=rp),
intent(inout) :: vec_out(:)
445 real(kind=rp),
intent(inout) :: vec_in(:)
446 integer,
intent(in) :: lvl
447 integer :: i, n, node_start, node_end, node_id
450 do n = 1, this%lvl(lvl)%nnodes
451 associate(node => this%lvl(lvl)%nodes(n))
453 vec_out( node%dofs(i) ) = vec_out( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
457 if (lvl-1 .eq. 0)
then
458 call this%gs_h%op(vec_out, this%lvl(lvl)%fine_lvl_dofs, gs_op_add)
459 call col2(vec_out, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
460 call this%blst%apply(vec_out, n)
467 real(kind=rp),
intent(inout) :: vec_out(:)
468 real(kind=rp),
intent(inout) :: vec_in(:)
469 type(c_ptr) :: vec_out_d
470 type(c_ptr) :: vec_in_d
471 integer,
intent(in) :: lvl_out
472 integer :: i, n, cdof, lvl
475 n = this%lvl(1)%fine_lvl_dofs
477 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
478 call this%gs_h%op(vec_out, n, gs_op_add, glb_cmd_event)
479 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
480 call this%blst%apply(vec_out, n)
483 associate( wrk_in_d => this%lvl(1)%wrk_in_d, wrk_out_d => this%lvl(1)%wrk_out_d)
485 call device_masked_gather_copy_0(wrk_in_d, vec_in_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
487 call this%gs_h%op(this%lvl(1)%wrk_in, n, gs_op_add, glb_cmd_event)
488 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
489 call device_col2( wrk_in_d, this%coef%mult_d, n)
490 call this%blst%apply(this%lvl(1)%wrk_in, n)
493 call this%ax%compute(this%lvl(1)%wrk_out, this%lvl(1)%wrk_in, this%coef, this%msh, this%Xh)
494 call this%gs_h%op(this%lvl(1)%wrk_out, n, gs_op_add, glb_cmd_event)
495 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
496 call this%blst%apply(this%lvl(1)%wrk_out, n)
498 call device_col2( wrk_out_d, this%coef%mult_d, n)
501 call device_rzero(vec_out_d, this%lvl(lvl)%nnodes)
502 call device_masked_atomic_reduction_0(vec_out_d, wrk_out_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
510 type(c_ptr) :: vec_out_d
511 type(c_ptr) :: vec_in_d
512 integer,
intent(in) :: lvl
514 n = this%lvl(lvl)%nnodes
515 m = this%lvl(lvl)%fine_lvl_dofs
516 if (lvl-1 .eq. 0)
then
517 call device_col2(vec_in_d, this%coef%mult_d, m)
519 call device_rzero(vec_out_d, n)
520 call device_masked_atomic_reduction_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
525 real(kind=rp),
intent(inout) :: vec_out(:)
526 type(c_ptr) :: vec_out_d
527 type(c_ptr) :: vec_in_d
528 integer,
intent(in) :: lvl
530 n = this%lvl(lvl)%nnodes
531 m = this%lvl(lvl)%fine_lvl_dofs
532 call device_masked_gather_copy_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
533 if (lvl-1 .eq. 0)
then
534 call this%gs_h%op(vec_out, m, gs_op_add, glb_cmd_event)
535 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
536 call device_col2( vec_out_d, this%coef%mult_d, m)
537 call this%blst%apply( vec_out, n)
Deassociate a Fortran array from a device pointer.
Map a Fortran array to a device (allocate and associate)
Defines a Matrix-vector product.
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Implements the base type for TreeAMG hierarchy structure.
subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl, vec_out)
subroutine lvl_free(this)
deallocate tamg level
recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
Matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * vec_in This is...
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
Restriction operator for TreeAMG. vec_out = R * vec_in.
subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
Prolongation operator for TreeAMG. vec_out = P * vec_in.
subroutine node_free(this)
deallocate tamg tree node
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
subroutine tamg_free(this)
deallocate tamg hierarchy
recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
Wrapper for matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * ve...
subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
subroutine tamg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst)
Initialization of TreeAMG hierarchy.
recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
Ignore this. For piecewise constant, can create index map directly to finest level.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.
Type for a TreeAMG hierarchy.
Type for storing TreeAMG level information.
Type for storing TreeAMG tree node information.