48 use,
intrinsic :: iso_c_binding
54 logical :: isleaf = .true.
58 integer,
allocatable :: dofs(:)
59 real(kind=
rp) :: xyz(3)
60 real(kind=
rp),
allocatable :: interp_r(:)
61 real(kind=
rp),
allocatable :: interp_p(:)
69 integer :: fine_lvl_dofs = 0
70 real(kind=
rp),
allocatable :: wrk_in(:)
71 type(c_ptr) :: wrk_in_d = c_null_ptr
72 real(kind=
rp),
allocatable :: wrk_out(:)
73 type(c_ptr) :: wrk_out_d = c_null_ptr
74 integer,
allocatable :: map_finest2lvl(:)
75 type(c_ptr) :: map_finest2lvl_d = c_null_ptr
77 integer,
allocatable :: nodes_ptr(:)
78 type(c_ptr) :: nodes_ptr_d = c_null_ptr
79 integer,
allocatable :: nodes_gid(:)
80 type(c_ptr) :: nodes_gid_d = c_null_ptr
81 integer,
allocatable :: nodes_dofs(:)
82 type(c_ptr) :: nodes_dofs_d = c_null_ptr
83 integer,
allocatable :: map_f2c(:)
84 type(c_ptr) :: map_f2c_d = c_null_ptr
131 subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
133 class(
ax_t),
target,
intent(in) :: ax
134 type(
space_t),
target,
intent(in) :: Xh
135 type(
coef_t),
target,
intent(in) :: coef
136 type(
mesh_t),
target,
intent(in) :: msh
137 type(
gs_t),
target,
intent(in) :: gs_h
138 integer,
intent(in) :: nlvls
139 type(
bc_list_t),
target,
intent(in) :: blst
149 if (nlvls .lt. 2)
then
150 call neko_error(
"Need to request at least two multigrid levels.")
154 allocate( this%lvl(this%nlvls) )
157 allocate( this%lvl(i)%map_finest2lvl( 0:coef%dof%size() ))
159 call device_map(this%lvl(i)%map_finest2lvl, this%lvl(i)%map_finest2lvl_d, coef%dof%size()+1)
131 subroutine tamg_init(this, ax, Xh, coef, msh, gs_h, nlvls, blst)
…
172 integer,
intent(in) :: lvl
173 integer,
intent(in) :: nnodes
174 integer,
intent(in) :: ndofs
177 tamg_lvl%nnodes = nnodes
178 allocate( tamg_lvl%nodes(tamg_lvl%nnodes) )
179 allocate( tamg_lvl%nodes_ptr(tamg_lvl%nnodes+1) )
180 allocate( tamg_lvl%nodes_gid(tamg_lvl%nnodes) )
181 allocate( tamg_lvl%nodes_dofs(ndofs) )
182 allocate( tamg_lvl%map_f2c(0:ndofs) )
184 call device_map(tamg_lvl%map_f2c, tamg_lvl%map_f2c_d, ndofs+1)
187 tamg_lvl%fine_lvl_dofs = ndofs
188 allocate( tamg_lvl%wrk_in( ndofs ) )
189 allocate( tamg_lvl%wrk_out( ndofs ) )
191 call device_map(tamg_lvl%wrk_in, tamg_lvl%wrk_in_d, ndofs)
193 call device_map(tamg_lvl%wrk_out, tamg_lvl%wrk_out_d, ndofs)
204 integer,
intent(in) :: gid
205 integer,
intent(in) :: ndofs
209 allocate( node%dofs( node%ndofs) )
211 allocate( node%interp_r( node%ndofs) )
212 allocate( node%interp_p( node%ndofs) )
213 node%interp_r = 1.0_rp
214 node%interp_p = 1.0_rp
225 real(kind=
rp),
intent(inout) :: vec_out(:)
226 real(kind=
rp),
intent(inout) :: vec_in(:)
227 integer,
intent(in) :: lvl_out
229 call this%matvec_impl(vec_out, vec_in, this%nlvls, lvl_out)
242 real(kind=
rp),
intent(inout) :: vec_out(:)
243 real(kind=
rp),
intent(inout) :: vec_in(:)
244 integer,
intent(in) :: lvl
245 integer,
intent(in) :: lvl_out
252 call this%gs_h%op(vec_in, n, gs_op_add)
253 call col2( vec_in, this%coef%mult(1,1,1,1), n)
255 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
257 call this%gs_h%op(vec_out, n, gs_op_add)
258 call this%blst%apply(vec_out, n)
261 if (lvl_out .ge. lvl)
then
264 associate( wrk_in => this%lvl(lvl)%wrk_in, wrk_out => this%lvl(lvl)%wrk_out)
265 n = this%lvl(lvl)%fine_lvl_dofs
266 call rzero(wrk_in, n)
267 call rzero(wrk_out, n)
268 call rzero(vec_out, this%lvl(lvl)%nnodes)
269 do n = 1, this%lvl(lvl)%nnodes
270 associate(node => this%lvl(lvl)%nodes(n))
272 wrk_in( node%dofs(i) ) = wrk_in( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
277 call this%matvec_impl(wrk_out, wrk_in, lvl-1, lvl_out)
280 do n = 1, this%lvl(lvl)%nnodes
281 associate(node => this%lvl(lvl)%nodes(n))
283 vec_out( node%gid ) = vec_out(node%gid ) + wrk_out( node%dofs(i) ) * node%interp_r( i )
288 else if (lvl_out .lt. lvl)
then
290 call this%matvec_impl(vec_out, vec_in, lvl-1, lvl_out)
292 call neko_error(
"TAMG: matvec level numbering problem.")
301 real(kind=rp),
intent(inout) :: vec_out(:)
302 real(kind=rp),
intent(inout) :: vec_in(:)
303 integer,
intent(in) :: lvl_blah
304 integer,
intent(in) :: lvl_out
305 integer :: i, n, cdof, lvl
312 call this%gs_h%op(vec_in, n, gs_op_add)
313 call col2( vec_in, this%coef%mult(1,1,1,1), n)
315 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
317 call this%gs_h%op(vec_out, n, gs_op_add)
318 call this%blst%apply(vec_out, n)
322 associate( wrk_in => this%lvl(1)%wrk_in, wrk_out => this%lvl(1)%wrk_out)
324 call rzero(wrk_out, n)
325 call rzero(vec_out, this%lvl(lvl)%nnodes)
329 cdof = this%lvl(lvl)%map_finest2lvl(i)
330 wrk_in(i) = vec_in( cdof )
334 call this%gs_h%op(wrk_in, n, gs_op_add)
335 call col2( wrk_in, this%coef%mult(1,1,1,1), n)
337 call this%ax%compute(wrk_out, wrk_in, this%coef, this%msh, this%Xh)
339 call this%gs_h%op(wrk_out, n, gs_op_add)
340 call this%blst%apply(wrk_out, n)
345 cdof = this%lvl(lvl)%map_finest2lvl(i)
346 vec_out(cdof) = vec_out(cdof) + wrk_out( i )
361 real(kind=rp),
intent(inout) :: vec_out(:)
362 real(kind=rp),
intent(inout) :: vec_in(:)
363 integer,
intent(in) :: lvl
364 integer :: i, n, node_start, node_end, node_id
367 do n = 1, this%lvl(lvl)%nnodes
368 associate(node => this%lvl(lvl)%nodes(n))
370 vec_out( node%gid ) = vec_out( node%gid ) + vec_in( node%dofs(i) ) * node%interp_r( i )
391 real(kind=rp),
intent(inout) :: vec_out(:)
392 real(kind=rp),
intent(inout) :: vec_in(:)
393 integer,
intent(in) :: lvl
394 integer :: i, n, node_start, node_end, node_id
397 do n = 1, this%lvl(lvl)%nnodes
398 associate(node => this%lvl(lvl)%nodes(n))
400 vec_out( node%dofs(i) ) = vec_out( node%dofs(i) ) + vec_in( node%gid ) * node%interp_p( i )
418 real(kind=rp),
intent(inout) :: vec_out(:)
419 real(kind=rp),
intent(inout) :: vec_in(:)
420 type(c_ptr) :: vec_out_d
421 type(c_ptr) :: vec_in_d
422 integer,
intent(in) :: lvl_out
423 integer :: i, n, cdof, lvl
426 n = this%lvl(1)%fine_lvl_dofs
428 call this%ax%compute(vec_out, vec_in, this%coef, this%msh, this%Xh)
429 call this%gs_h%op(vec_out, n, gs_op_add, glb_cmd_event)
430 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
431 call this%blst%apply(vec_out, n)
434 associate( wrk_in_d => this%lvl(1)%wrk_in_d, wrk_out_d => this%lvl(1)%wrk_out_d)
436 call device_masked_red_copy(wrk_in_d, vec_in_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
438 call this%gs_h%op(this%lvl(1)%wrk_in, n, gs_op_add)
439 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
440 call device_col2( wrk_in_d, this%coef%mult_d, n)
442 call this%ax%compute(this%lvl(1)%wrk_out, this%lvl(1)%wrk_in, this%coef, this%msh, this%Xh)
443 call this%gs_h%op(this%lvl(1)%wrk_out, n, gs_op_add)
444 call device_stream_wait_event(glb_cmd_queue, glb_cmd_event, 0)
445 call this%blst%apply(this%lvl(1)%wrk_out, n)
447 call device_rzero(vec_out_d, this%lvl(lvl)%nnodes)
448 call device_masked_atomic_reduction(vec_out_d, wrk_out_d, this%lvl(lvl)%map_finest2lvl_d, this%lvl(lvl)%nnodes, n)
457 type(c_ptr) :: vec_out_d
458 type(c_ptr) :: vec_in_d
459 integer,
intent(in) :: lvl
461 n = this%lvl(lvl)%nnodes
462 m = this%lvl(lvl)%fine_lvl_dofs
463 call device_rzero(vec_out_d, n)
464 call device_masked_atomic_reduction(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
469 type(c_ptr) :: vec_out_d
470 type(c_ptr) :: vec_in_d
471 integer,
intent(in) :: lvl
473 n = this%lvl(lvl)%nnodes
474 m = this%lvl(lvl)%fine_lvl_dofs
475 call device_masked_red_copy(vec_out_d, vec_in_d, this%lvl(lvl)%map_f2c_d, n, m)
Map a Fortran array to a device (allocate and associate)
Defines a Matrix-vector product.
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_masked_red_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
Device abstraction, common interface for various accelerators.
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Implements the base type for TreeAMG hierarchy structure.
subroutine tamg_device_matvec_flat_impl(this, vec_out, vec_in, vec_out_d, vec_in_d, lvl_out)
recursive subroutine tamg_matvec_impl(this, vec_out, vec_in, lvl, lvl_out)
Matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * vec_in This is...
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
subroutine tamg_restriction_operator(this, vec_out, vec_in, lvl)
Restriction operator for TreeAMG. vec_out = R * vec_in.
subroutine tamg_prolongation_operator(this, vec_out, vec_in, lvl)
Prolongation operator for TreeAMG. vec_out = P * vec_in.
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
subroutine tamg_device_prolongation_operator(this, vec_out_d, vec_in_d, lvl)
recursive subroutine tamg_matvec(this, vec_out, vec_in, lvl_out)
Wrapper for matrix vector product using the TreeAMG hierarchy structure b=Ax done as vec_out = A * ve...
subroutine tamg_device_restriction_operator(this, vec_out_d, vec_in_d, lvl)
subroutine tamg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst)
Initialization of TreeAMG hierarchy.
recursive subroutine tamg_matvec_flat_impl(this, vec_out, vec_in, lvl_blah, lvl_out)
Ignore this. For piecewise constant, can create index map directly to finest level.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.
Type for a TreeAMG hierarchy.
Type for storing TreeAMG level information.
Type for storing TreeAMG tree node information.