48 use,
intrinsic :: iso_c_binding
54 logical :: isleaf = .true.
58 integer,
allocatable :: dofs(:)
59 real(kind=
rp) :: xyz(3)
60 real(kind=
rp),
allocatable :: interp_r(:)
61 real(kind=
rp),
allocatable :: interp_p(:)
69 integer :: fine_lvl_dofs = 0
70 real(kind=
rp),
allocatable :: wrk_in(:)
71 type(c_ptr) :: wrk_in_d = c_null_ptr
72 real(kind=
rp),
allocatable :: wrk_out(:)
73 type(c_ptr) :: wrk_out_d = c_null_ptr
74 integer,
allocatable :: map_finest2lvl(:)
75 type(c_ptr) :: map_finest2lvl_d = c_null_ptr
77 integer,
allocatable :: nodes_ptr(:)
78 type(c_ptr) :: nodes_ptr_d = c_null_ptr
79 integer,
allocatable :: nodes_gid(:)
80 type(c_ptr) :: nodes_gid_d = c_null_ptr
81 integer,
allocatable :: nodes_dofs(:)
82 type(c_ptr) :: nodes_dofs_d = c_null_ptr
83 integer,
allocatable :: map_f2c(:)
84 type(c_ptr) :: map_f2c_d = c_null_ptr
131 subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
133 class(
ax_t),
target,
intent(in) :: ax
134 type(
space_t),
target,
intent(in) :: Xh
135 type(
coef_t),
target,
intent(in) :: coef
136 type(
mesh_t),
target,
intent(in) :: msh
137 type(
gs_t),
target,
intent(in) :: gs_h
138 integer,
intent(in) :: nlvls
139 type(
bc_list_t),
target,
intent(in) :: blst
149 if (nlvls .lt. 2)
then
150 call neko_error(
"Need to request at least two multigrid levels.")
154 allocate( this%lvl(this%nlvls) )
157 allocate( this%lvl(i)%map_finest2lvl( 0:coef%dof%size() ))
159 call device_map(this%lvl(i)%map_finest2lvl, this%lvl(i)%map_finest2lvl_d, coef%dof%size()+1)
172 integer,
intent(in) :: lvl
173 integer,
intent(in) :: nnodes
174 integer,
intent(in) :: ndofs
177 tamg_lvl%nnodes = nnodes
178 allocate( tamg_lvl%nodes(tamg_lvl%nnodes) )
179 allocate( tamg_lvl%nodes_ptr(tamg_lvl%nnodes+1) )
180 allocate( tamg_lvl%nodes_gid(tamg_lvl%nnodes) )
181 allocate( tamg_lvl%nodes_dofs(ndofs) )
182 allocate( tamg_lvl%map_f2c(0:ndofs) )
184 call device_map(tamg_lvl%map_f2c, tamg_lvl%map_f2c_d, ndofs+1)
187 tamg_lvl%fine_lvl_dofs = ndofs
188 allocate( tamg_lvl%wrk_in( ndofs ) )
189 allocate( tamg_lvl%wrk_out( ndofs ) )
191 call device_map(tamg_lvl%wrk_in, tamg_lvl%wrk_in_d, ndofs)
193 call device_map(tamg_lvl%wrk_out, tamg_lvl%wrk_out_d, ndofs)
204 integer,
intent(in) :: gid
205 integer,
intent(in) :: ndofs
209 allocate( node%dofs( node%ndofs) )
211 allocate( node%interp_r( node%ndofs) )
212 allocate( node%interp_p( node%ndofs) )
213 node%interp_r = 1.0_rp
214 node%interp_p = 1.0_rp
225 real(kind=
rp),
intent(inout) :: vec_out(:)
226 real(kind=
rp),
intent(inout) :: vec_in(:)
227 integer,
intent(in) :: lvl_out
229 call this%matvec_impl(vec_out, vec_in, this%nlvls, lvl_out)
242 real(kind=
rp),
intent(inout) :: vec_out(:)
243 real(kind=
rp),
intent(inout) :: vec_in(:)
244 integer,
intent(in) :: lvl
245 integer,
intent(in) :: lvl_out
252 call this%gs_h%op(vec_in, n, gs_op_add)
253 call col2( vec_in, this%coef%mult, n)
255 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
256 call this%gs_h%op(vec_out, n, gs_op_add)
257 call this%blst%apply(vec_out, n)
259 if (lvl_out .ne. 0)
then
260 call col2(vec_out, this%coef%mult, n)
264 if (lvl_out .ge. lvl)
then
267 associate( wrk_in => this%lvl(lvl)%wrk_in, wrk_out => this%lvl(lvl)%wrk_out)
268 n = this%lvl(lvl)%fine_lvl_dofs
269 call rzero(wrk_in, n)
270 call rzero(vec_out, this%lvl(lvl)%nnodes)
271 do n = 1, this%lvl(lvl)%nnodes
272 associate(node => this%lvl(lvl)%nodes(n))
274 wrk_in( node%dofs(i) ) = wrk_in( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
279 call this%matvec_impl(wrk_out, wrk_in, lvl-1, lvl_out)
282 do n = 1, this%lvl(lvl)%nnodes
283 associate(node => this%lvl(lvl)%nodes(n))
285 vec_out( node%gid ) = vec_out(node%gid ) + wrk_out( node%dofs(i) ) * node%interp_r( i )
290 else if (lvl_out .lt. lvl)
then
292 call this%matvec_impl(vec_out, vec_in, lvl-1, lvl_out)
294 call neko_error(
"TAMG: matvec level numbering problem.")
303 real(kind=rp),
intent(inout) :: vec_out(:)
304 real(kind=rp),
intent(inout) :: vec_in(:)
305 integer,
intent(in) :: lvl_blah
306 integer,
intent(in) :: lvl_out
307 integer :: i, n, cdof, lvl
310 n = this%lvl(1)%fine_lvl_dofs
312 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
313 call this%gs_h%op(vec_out, n, gs_op_add)
314 call this%blst%apply(vec_out, n)
316 associate( wrk_in => this%lvl(1)%wrk_in, wrk_out => this%lvl(1)%wrk_out)
319 cdof = this%lvl(lvl)%map_finest2lvl(i)
320 wrk_in(i) = vec_in( cdof )
324 call this%gs_h%op(wrk_in, n, gs_op_add)
325 call col2( wrk_in, this%coef%mult, n)
328 call this%ax%compute(wrk_out, wrk_in, this%coef, this%msh, this%Xh)
329 call this%gs_h%op(wrk_out, n, gs_op_add)
330 call this%blst%apply(wrk_out, n)
332 call col2(wrk_out, this%coef%mult, n)
335 call rzero(vec_out, this%lvl(lvl)%nnodes)
337 cdof = this%lvl(lvl)%map_finest2lvl(i)
338 vec_out(cdof) = vec_out(cdof) + wrk_out( i )
352 real(kind=rp),
intent(inout) :: vec_out(:)
353 real(kind=rp),
intent(inout) :: vec_in(:)
354 integer,
intent(in) :: lvl
355 integer :: i, n, node_start, node_end, node_id
358 if (lvl-1 .eq. 0)
then
359 call col2(vec_in, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
361 do n = 1, this%lvl(lvl)%nnodes
362 associate(node => this%lvl(lvl)%nodes(n))
364 vec_out( node%gid ) = vec_out( node%gid ) + vec_in( node%dofs(i) ) * node%interp_r( i )
376 real(kind=rp),
intent(inout) :: vec_out(:)
377 real(kind=rp),
intent(inout) :: vec_in(:)
378 integer,
intent(in) :: lvl
379 integer :: i, n, node_start, node_end, node_id
382 do n = 1, this%lvl(lvl)%nnodes
383 associate(node => this%lvl(lvl)%nodes(n))
385 vec_out( node%dofs(i) ) = vec_out( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
389 if (lvl-1 .eq. 0)
then
390 call this%gs_h%op(vec_out, this%lvl(lvl)%fine_lvl_dofs, gs_op_add)
391 call col2(vec_out, this%coef%mult, this%lvl(lvl)%fine_lvl_dofs)
398 real(kind=rp),
intent(inout) :: vec_out(:)
399 real(kind=rp),
intent(inout) :: vec_in(:)
400 type(c_ptr) :: vec_out_d
401 type(c_ptr) :: vec_in_d
402 integer,
intent(in) :: lvl_out
403 integer :: i, n, cdof, lvl
406 n = this%lvl(1)%fine_lvl_dofs
408 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
409 call this%gs_h%op(vec_out, n, gs_op_add, glb_cmd_event)
410 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
411 call this%blst%apply(vec_out, n)
414 associate( wrk_in_d => this%lvl(1)%wrk_in_d, wrk_out_d => this%lvl(1)%wrk_out_d)
416 call device_masked_gather_copy_0(wrk_in_d, vec_in_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
418 call this%gs_h%op(this%lvl(1)%wrk_in, n, gs_op_add, glb_cmd_event)
419 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
420 call device_col2( wrk_in_d, this%coef%mult_d, n)
423 call this%ax%compute(this%lvl(1)%wrk_out, this%lvl(1)%wrk_in, this%coef, this%msh, this%Xh)
424 call this%gs_h%op(this%lvl(1)%wrk_out, n, gs_op_add, glb_cmd_event)
425 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
426 call this%blst%apply(this%lvl(1)%wrk_out, n)
428 call device_col2( wrk_out_d, this%coef%mult_d, n)
431 call device_rzero(vec_out_d, this%lvl(lvl)%nnodes)
432 call device_masked_atomic_reduction_0(vec_out_d, wrk_out_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
440 type(c_ptr) :: vec_out_d
441 type(c_ptr) :: vec_in_d
442 integer,
intent(in) :: lvl
444 n = this%lvl(lvl)%nnodes
445 m = this%lvl(lvl)%fine_lvl_dofs
446 if (lvl-1 .eq. 0)
then
447 call device_col2(vec_in_d, this%coef%mult_d, m)
449 call device_rzero(vec_out_d, n)
450 call device_masked_atomic_reduction_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
455 real(kind=rp),
intent(inout) :: vec_out(:)
456 type(c_ptr) :: vec_out_d
457 type(c_ptr) :: vec_in_d
458 integer,
intent(in) :: lvl
460 n = this%lvl(lvl)%nnodes
461 m = this%lvl(lvl)%fine_lvl_dofs
462 call device_masked_gather_copy_0(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
463 if (lvl-1 .eq. 0)
then
464 call this%gs_h%op(vec_out, m, gs_op_add, glb_cmd_event)
465 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
466 call device_col2( vec_out_d, this%coef%mult_d, m)
Map a Fortran array to a device (allocate and associate)
Defines a Matrix-vector product.
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Implements the base type for TreeAMG hierarchy structure.
subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl, vec_out)
recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
Matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * vec_in This is...
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
Restriction operator for TreeAMG. vec_out = R * vec_in.
subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
Prolongation operator for TreeAMG. vec_out = P * vec_in.
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
Wrapper for matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * ve...
subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
subroutine tamg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst)
Initialization of TreeAMG hierarchy.
recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
Ignore this. For piecewise constant, can create index map directly to finest level.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.
Type for a TreeAMG hierarchy.
Type for storing TreeAMG level information.
Type for storing TreeAMG tree node information.