48  use, 
intrinsic :: iso_c_binding
 
   54     logical :: isleaf = .true. 
 
   58     integer, 
allocatable :: dofs(:)
 
   59     real(kind=
rp) :: xyz(3) 
 
   60     real(kind=
rp), 
allocatable :: interp_r(:) 
 
   61     real(kind=
rp), 
allocatable :: interp_p(:) 
 
 
   69     integer :: fine_lvl_dofs = 0
 
   70     real(kind=
rp), 
allocatable :: wrk_in(:) 
 
   71     type(c_ptr) :: wrk_in_d = c_null_ptr
 
   72     real(kind=
rp), 
allocatable :: wrk_out(:) 
 
   73     type(c_ptr) :: wrk_out_d = c_null_ptr
 
   74     integer, 
allocatable :: map_finest2lvl(:)
 
   75     type(c_ptr) :: map_finest2lvl_d = c_null_ptr
 
   77     integer, 
allocatable :: nodes_ptr(:)
 
   78     type(c_ptr) :: nodes_ptr_d = c_null_ptr
 
   79     integer, 
allocatable :: nodes_gid(:)
 
   80     type(c_ptr) :: nodes_gid_d = c_null_ptr
 
   81     integer, 
allocatable :: nodes_dofs(:)
 
   82     type(c_ptr) :: nodes_dofs_d = c_null_ptr
 
   83     integer, 
allocatable :: map_f2c(:)
 
   84     type(c_ptr) :: map_f2c_d = c_null_ptr
 
 
  131  subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
 
  133    class(
ax_t), 
target, 
intent(in) :: ax
 
  134    type(
space_t),
target, 
intent(in) :: Xh
 
  135    type(
coef_t), 
target, 
intent(in) :: coef
 
  136    type(
mesh_t), 
target, 
intent(in) :: msh
 
  137    type(
gs_t), 
target, 
intent(in) :: gs_h
 
  138    integer, 
intent(in) :: nlvls
 
  139    type(
bc_list_t), 
target, 
intent(in) :: blst
 
  149    if (nlvls .lt. 2) 
then 
  150       call neko_error(
"Need to request at least two multigrid levels.")
 
  154    allocate( this%lvl(this%nlvls) )
 
  157       allocate( this%lvl(i)%map_finest2lvl( 0:coef%dof%size() ))
 
  159          call device_map(this%lvl(i)%map_finest2lvl, this%lvl(i)%map_finest2lvl_d, coef%dof%size()+1)
 
 
  172    integer, 
intent(in) :: lvl
 
  173    integer, 
intent(in) :: nnodes
 
  174    integer, 
intent(in) :: ndofs
 
  177    tamg_lvl%nnodes = nnodes
 
  178    allocate( tamg_lvl%nodes(tamg_lvl%nnodes) )
 
  179    allocate( tamg_lvl%nodes_ptr(tamg_lvl%nnodes+1) )
 
  180    allocate( tamg_lvl%nodes_gid(tamg_lvl%nnodes) )
 
  181    allocate( tamg_lvl%nodes_dofs(ndofs) )
 
  182    allocate( tamg_lvl%map_f2c(0:ndofs) )
 
  184       call device_map(tamg_lvl%map_f2c, tamg_lvl%map_f2c_d, ndofs+1)
 
  187    tamg_lvl%fine_lvl_dofs = ndofs
 
  188    allocate( tamg_lvl%wrk_in( ndofs ) )
 
  189    allocate( tamg_lvl%wrk_out( ndofs ) )
 
  191       call device_map(tamg_lvl%wrk_in, tamg_lvl%wrk_in_d, ndofs)
 
  193       call device_map(tamg_lvl%wrk_out, tamg_lvl%wrk_out_d, ndofs)
 
 
  204    integer, 
intent(in) :: gid
 
  205    integer, 
intent(in) :: ndofs
 
  209    allocate( node%dofs( node%ndofs) )
 
  211    allocate( node%interp_r( node%ndofs) )
 
  212    allocate( node%interp_p( node%ndofs) )
 
  213    node%interp_r = 1.0_rp
 
  214    node%interp_p = 1.0_rp
 
 
  225    real(kind=
rp), 
intent(inout) :: vec_out(:)
 
  226    real(kind=
rp), 
intent(inout) :: vec_in(:)
 
  227    integer, 
intent(in) :: lvl_out
 
 
  242    real(kind=
rp), 
intent(inout) :: vec_out(:)
 
  243    real(kind=
rp), 
intent(inout) :: vec_in(:)
 
  244    integer, 
intent(in) :: lvl
 
  245    integer, 
intent(in) :: lvl_out
 
  252       call this%gs_h%op(vec_in, n, gs_op_add)
 
  253       call col2( vec_in, this%coef%mult, n)
 
  255       call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
 
  256       call this%gs_h%op(vec_out, n, gs_op_add)
 
  257       call this%blst%apply(vec_out, n)
 
  259       if (lvl_out .ne. 0) 
then 
  260          call col2(vec_out, this%coef%mult, n)
 
  264       if (lvl_out .ge. lvl) 
then 
  267          associate( wrk_in => this%lvl(lvl)%wrk_in, wrk_out => this%lvl(lvl)%wrk_out)
 
  268            n = this%lvl(lvl)%fine_lvl_dofs
 
  269            call rzero(wrk_in, n)
 
  270            call rzero(vec_out, this%lvl(lvl)%nnodes)
 
  271            do n = 1, this%lvl(lvl)%nnodes
 
  272               associate(node => this%lvl(lvl)%nodes(n))
 
  274                    wrk_in( node%dofs(i) ) = wrk_in( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
 
  279            call this%matvec_impl(wrk_out, wrk_in, lvl-1, lvl_out)
 
  282            do n = 1, this%lvl(lvl)%nnodes
 
  283               associate(node => this%lvl(lvl)%nodes(n))
 
  285                    vec_out( node%gid ) = vec_out(node%gid ) + wrk_out( node%dofs(i) ) * node%interp_r( i )
 
  290       else if (lvl_out .lt. lvl) 
then 
  292          call this%matvec_impl(vec_out, vec_in, lvl-1, lvl_out)
 
  294          call neko_error(
"TAMG: matvec level numbering problem.")
 
 
  303    real(kind=rp), 
intent(inout) :: vec_out(:)
 
  304    real(kind=rp), 
intent(inout) :: vec_in(:)
 
  305    integer, 
intent(in) :: lvl_blah
 
  306    integer, 
intent(in) :: lvl_out
 
  307    integer :: i, n, cdof, lvl
 
  310    n = this%lvl(1)%fine_lvl_dofs
 
  312       call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
 
  313       call this%gs_h%op(vec_out, n, gs_op_add)
 
  314       call this%blst%apply(vec_out, n)
 
  316       associate( wrk_in => this%lvl(1)%wrk_in, wrk_out => this%lvl(1)%wrk_out)
 
  319            cdof = this%lvl(lvl)%map_finest2lvl(i)
 
  320            wrk_in(i) = vec_in( cdof )
 
  324         call this%gs_h%op(wrk_in, n, gs_op_add)
 
  325         call col2( wrk_in, this%coef%mult, n)
 
  326         call this%blst%apply(wrk_in, n)
 
  329         call this%ax%compute(wrk_out, wrk_in, this%coef, this%msh, this%Xh)
 
  330         call this%gs_h%op(wrk_out, n, gs_op_add)
 
  331         call this%blst%apply(wrk_out, n)
 
  333         call col2(wrk_out, this%coef%mult, n)
 
  336         call rzero(vec_out, this%lvl(lvl)%nnodes)
 
  338            cdof = this%lvl(lvl)%map_finest2lvl(i)
 
  339            vec_out(cdof) = vec_out(cdof) + wrk_out( i )
 
 
  353    real(kind=rp), 
intent(inout) :: vec_out(:)
 
  354    real(kind=rp), 
intent(inout) :: vec_in(:)
 
  355    integer, 
intent(in) :: lvl
 
  356    integer :: i, n, node_start, node_end, node_id
 
  359    if (lvl-1 .eq. 0) 
then 
  360       call col2(vec_in, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
 
  362    do n = 1, this%lvl(lvl)%nnodes
 
  363       associate(node => this%lvl(lvl)%nodes(n))
 
  365            vec_out( node%gid ) = vec_out( node%gid ) + vec_in( node%dofs(i) ) * node%interp_r( i )
 
 
  377    real(kind=rp), 
intent(inout) :: vec_out(:)
 
  378    real(kind=rp), 
intent(inout) :: vec_in(:)
 
  379    integer, 
intent(in) :: lvl
 
  380    integer :: i, n, node_start, node_end, node_id
 
  383    do n = 1, this%lvl(lvl)%nnodes
 
  384       associate(node => this%lvl(lvl)%nodes(n))
 
  386            vec_out( node%dofs(i) ) = vec_out( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
 
  390    if (lvl-1 .eq. 0) 
then 
  391       call this%gs_h%op(vec_out, this%lvl(lvl)%fine_lvl_dofs, gs_op_add)
 
  392       call col2(vec_out, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
 
  393       call this%blst%apply(vec_out, n)
 
 
  400    real(kind=rp), 
intent(inout) :: vec_out(:)
 
  401    real(kind=rp), 
intent(inout) :: vec_in(:)
 
  402    type(c_ptr) :: vec_out_d
 
  403    type(c_ptr) :: vec_in_d
 
  404    integer, 
intent(in) :: lvl_out
 
  405    integer :: i, n, cdof, lvl
 
  408    n = this%lvl(1)%fine_lvl_dofs
 
  410       call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
 
  411       call this%gs_h%op(vec_out, n, gs_op_add, glb_cmd_event)
 
  412       call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
 
  413       call this%blst%apply(vec_out, n)
 
  416       associate( wrk_in_d => this%lvl(1)%wrk_in_d, wrk_out_d => this%lvl(1)%wrk_out_d)
 
  418         call device_masked_gather_copy_0(wrk_in_d, vec_in_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
 
  420         call this%gs_h%op(this%lvl(1)%wrk_in, n, gs_op_add, glb_cmd_event)
 
  421         call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
 
  422         call device_col2( wrk_in_d, this%coef%mult_d, n)
 
  423         call this%blst%apply(this%lvl(1)%wrk_in, n)
 
  426         call this%ax%compute(this%lvl(1)%wrk_out, this%lvl(1)%wrk_in, this%coef, this%msh, this%Xh)
 
  427         call this%gs_h%op(this%lvl(1)%wrk_out, n, gs_op_add, glb_cmd_event)
 
  428         call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
 
  429         call this%blst%apply(this%lvl(1)%wrk_out, n)
 
  431         call device_col2( wrk_out_d, this%coef%mult_d, n)
 
  434         call device_rzero(vec_out_d, this%lvl(lvl)%nnodes)
 
  435         call device_masked_atomic_reduction_0(vec_out_d, wrk_out_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
 
 
  443    type(c_ptr) :: vec_out_d
 
  444    type(c_ptr) :: vec_in_d
 
  445    integer, 
intent(in) :: lvl
 
  447    n = this%lvl(lvl)%nnodes
 
  448    m = this%lvl(lvl)%fine_lvl_dofs
 
  449    if (lvl-1 .eq. 0) 
then 
  450       call device_col2(vec_in_d, this%coef%mult_d, m)
 
  452    call device_rzero(vec_out_d, n)
 
  453    call device_masked_atomic_reduction_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
 
 
  458    real(kind=rp), 
intent(inout) :: vec_out(:)
 
  459    type(c_ptr) :: vec_out_d
 
  460    type(c_ptr) :: vec_in_d
 
  461    integer, 
intent(in) :: lvl
 
  463    n = this%lvl(lvl)%nnodes
 
  464    m = this%lvl(lvl)%fine_lvl_dofs
 
  465    call device_masked_gather_copy_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
 
  466    if (lvl-1 .eq. 0) 
then 
  467       call this%gs_h%op(vec_out, m, gs_op_add, glb_cmd_event)
 
  468       call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
 
  469       call device_col2( vec_out_d, this%coef%mult_d, m)
 
  470       call this%blst%apply( vec_out, n)
 
 
Map a Fortran array to a device (allocate and associate)
 
Defines a Matrix-vector product.
 
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
 
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
 
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
 
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
 
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
 
Device abstraction, common interface for various accelerators.
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
 
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
 
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
 
subroutine, public col2(a, b, n)
Vector multiplication .
 
subroutine, public rzero(a, n)
Zero a real vector.
 
integer, parameter neko_bcknd_device
 
integer, parameter, public rp
Global precision used in computations.
 
Defines a function space.
 
Implements the base type for TreeAMG hierarchy structure.
 
subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
 
subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl, vec_out)
 
recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
Matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * vec_in This is...
 
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
 
subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
Restriction operator for TreeAMG. vec_out = R * vec_in.
 
subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
Prolongation operator for TreeAMG. vec_out = P * vec_in.
 
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
 
recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
Wrapper for matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * ve...
 
subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
 
subroutine tamg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst)
Initialization of TreeAMG hierarchy.
 
recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
Ignore this. For piecewise constant, can create index map directly to finest level.
 
Base type for a matrix-vector product providing .
 
A list of allocatable `bc_t`. Follows the standard interface of lists.
 
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
 
The function space for the SEM solution fields.
 
Type for a TreeAMG hierarchy.
 
Type for storing TreeAMG level information.
 
Type for storing TreeAMG tree node information.