51 use mpi_f08,
only: mpi_allreduce, mpi_min, mpi_in_place, mpi_integer
66 use,
intrinsic :: iso_c_binding
71 real(kind=
rp),
allocatable :: r(:)
72 real(kind=
rp),
allocatable :: rc(:)
73 real(kind=
rp),
allocatable :: tmp(:)
74 type(c_ptr) :: r_d = c_null_ptr
75 type(c_ptr) :: rc_d = c_null_ptr
76 type(c_ptr) :: tmp_d = c_null_ptr
105 max_iter, cheby_degree)
107 class(
ax_t),
target,
intent(in) :: ax
108 type(
space_t),
target,
intent(in) :: Xh
109 type(
coef_t),
target,
intent(in) :: coef
110 type(
mesh_t),
target,
intent(in) :: msh
111 type(
gs_t),
target,
intent(in) :: gs_h
112 type(
bc_list_t),
target,
intent(in) :: blst
113 integer,
intent(in) :: nlvls
114 integer,
intent(in) :: max_iter
115 integer,
intent(in) :: cheby_degree
116 integer :: lvl, n, mlvl, target_num_aggs
117 integer,
allocatable :: agg_nhbr(:,:), nhbr_tmp(:,:)
118 character(len=LOG_SIZE) :: log_buf
119 integer :: glb_min_target_aggs
120 logical :: use_greedy_agg
124 write(log_buf,
'(A28,I2,A8)')
'Creating AMG hierarchy with', &
129 call this%amg%init(ax, xh, coef, msh, gs_h, nlvls, blst)
132 use_greedy_agg = .true.
137 allocate( agg_nhbr, source = msh%facet_neigh )
140 if (use_greedy_agg)
then
141 target_num_aggs = this%amg%lvl(mlvl-1)%nnodes / 8
143 target_num_aggs = this%amg%lvl(mlvl-1)%nnodes / 2
146 glb_min_target_aggs = target_num_aggs
147 call mpi_allreduce(mpi_in_place, glb_min_target_aggs, 1, &
149 if (glb_min_target_aggs .lt. 4 )
then
151 "TAMG: Too many levels. Not enough DOFs for coarsest grid.")
152 this%amg%nlvls = mlvl
156 if (use_greedy_agg)
then
161 call aggregate_pairs( this%amg, mlvl, target_num_aggs, agg_nhbr, nhbr_tmp)
165 deallocate( nhbr_tmp )
167 deallocate( agg_nhbr )
172 this%max_iter = max_iter
174 this%nlvls = this%amg%nlvls
175 if (this%nlvls .gt. this%amg%nlvls)
then
177 "Requested number multigrid levels &
178 & is greater than the initialized AMG levels")
182 allocate(this%smoo(0:(this%amg%nlvls)))
183 do lvl = 0, this%amg%nlvls-1
184 n = this%amg%lvl(lvl+1)%fine_lvl_dofs
185 call this%smoo(lvl)%init(n, lvl, cheby_degree)
189 allocate(this%wrk(this%amg%nlvls))
190 do lvl = 1, this%amg%nlvls
191 n = this%amg%lvl(lvl)%fine_lvl_dofs
192 allocate( this%wrk(lvl)%r(n) )
193 allocate( this%wrk(lvl)%rc(n) )
194 allocate( this%wrk(lvl)%tmp(n) )
196 call device_map( this%wrk(lvl)%r, this%wrk(lvl)%r_d, n)
197 call device_map( this%wrk(lvl)%rc, this%wrk(lvl)%rc_d, n)
198 call device_map( this%wrk(lvl)%tmp, this%wrk(lvl)%tmp_d, n)
219 if (
allocated(this%amg))
then
223 if (
allocated(this%smoo))
then
224 do i = 1,
size(this%smoo)
225 call this%smoo(i)%free()
227 deallocate(this%smoo)
229 if (
allocated(this%wrk))
then
230 do i = 1,
size(this%wrk)
236 if (
allocated(this%wrk(i)%r))
deallocate(this%wrk(i)%r)
237 if (
allocated(this%wrk(i)%rc))
deallocate(this%wrk(i)%rc)
238 if (
allocated(this%wrk(i)%tmp))
deallocate(this%wrk(i)%tmp)
249 integer,
intent(in) :: n
251 real(kind=
rp),
dimension(n),
intent(inout) :: z
252 real(kind=
rp),
dimension(n),
intent(inout) :: r
255 integer :: iter, max_iter
256 logical :: zero_initial_guess
258 max_iter = this%max_iter
265 zero_initial_guess = .true.
267 do iter = 1, max_iter
270 zero_initial_guess = .false.
275 zero_initial_guess = .true.
277 do iter = 1, max_iter
280 zero_initial_guess = .false.
295 integer,
intent(in) :: n
296 real(kind=
rp),
intent(inout) :: x(n)
297 real(kind=
rp),
intent(inout) :: b(n)
300 logical,
intent(in) :: zero_initial_guess
301 integer,
intent(in) :: lvl
302 real(kind=
rp) :: r(n)
303 real(kind=
rp) :: rc(n)
304 real(kind=
rp) :: tmp(n)
305 integer :: iter, num_iter
308 max_lvl = mgstuff%nlvls-1
312 call mgstuff%smoo(lvl)%solve(x, b, n, amg, &
314 if (lvl .eq. max_lvl)
then
324 call amg%interp_f2c(rc, r, lvl+1)
329 call tamg_mg_cycle(tmp, rc, amg%lvl(lvl+1)%nnodes, lvl+1, amg, mgstuff, &
334 call amg%interp_c2f(r, tmp, lvl+1)
342 call mgstuff%smoo(lvl)%solve(x,b, n, amg)
354 integer,
intent(in) :: n
355 real(kind=
rp),
intent(inout) :: x(n)
356 real(kind=
rp),
intent(inout) :: b(n)
361 logical,
intent(in) :: zero_initial_guess
362 integer,
intent(in) :: lvl
363 integer :: iter, num_iter
366 max_lvl = mgstuff%nlvls-1
370 call mgstuff%smoo(lvl)%device_solve(x, b, x_d, b_d, n, amg, &
372 if (lvl .eq. max_lvl)
then
375 associate( r => mgstuff%wrk(lvl+1)%r, r_d => mgstuff%wrk(lvl+1)%r_d, &
376 rc => mgstuff%wrk(lvl+1)%rc, rc_d => mgstuff%wrk(lvl+1)%rc_d, &
377 tmp => mgstuff%wrk(lvl+1)%tmp, tmp_d => mgstuff%wrk(lvl+1)%tmp_d )
381 call amg%device_matvec(r, x, r_d, x_d, lvl)
386 call amg%interp_f2c_d(rc_d, r_d, lvl+1)
392 amg%lvl(lvl+1)%nnodes, lvl+1, amg, mgstuff, .true.)
396 call amg%interp_c2f_d(r_d, tmp_d, lvl+1, r)
404 call mgstuff%smoo(lvl)%device_solve(x, b, x_d, b_d, n, amg)
417 integer,
intent(in) :: n
418 real(kind=rp),
intent(inout) :: r(n)
419 real(kind=rp),
intent(inout) :: x(n)
420 real(kind=rp),
intent(inout) :: b(n)
421 type(tamg_hierarchy_t),
intent(inout) :: amg
422 integer,
intent(in) :: lvl
424 call amg%matvec(r, x, lvl)
425 call sub3(r, b, r, n)
430 integer,
intent(in) :: lvl, nagg, agg_type
431 character(len=LOG_SIZE) :: log_buf
433 if (agg_type .eq. 1)
then
434 write(log_buf,
'(A8,I2,A31)')
'-- level', lvl, &
435 '-- Calling Greedy Aggregation'
436 else if (agg_type .eq. 2)
then
437 write(log_buf,
'(A8,I2,A33)')
'-- level', lvl, &
438 '-- Calling Pairwise Aggregation'
440 write(log_buf,
'(A8,I2,A31)')
'-- level', lvl, &
441 '-- UNKNOWN Aggregation'
443 call neko_log%message(log_buf)
444 write(log_buf,
'(A33,I6)')
'Target Aggregates:', nagg
445 call neko_log%message(log_buf)
449 integer,
intent(in) :: lvl, n
450 real(kind=rp),
intent(inout) :: r(n)
451 real(kind=rp),
intent(inout) :: x(n)
452 real(kind=rp),
intent(inout) :: b(n)
456 type(tamg_hierarchy_t),
intent(inout) :: amg
458 character(len=LOG_SIZE) :: log_buf
460 call amg%device_matvec(r, x, r_d, x_d, lvl)
461 call device_sub3(r_d, b_d, r_d, n)
462 val = device_glsc2(r_d, r_d, n)
464 write(log_buf,
'(A33,I6,F12.6)')
'tAMG resid:', lvl, val
465 call neko_log%message(log_buf)
471 type(tamg_hierarchy_t),
intent(inout) :: amg
472 integer :: i, j, k, l, nid, n
473 do j = 1, amg%lvl(1)%nnodes
474 do k = 1, amg%lvl(1)%nodes(j)%ndofs
475 nid = amg%lvl(1)%nodes(j)%dofs(k)
476 amg%lvl(1)%map_finest2lvl(nid) = amg%lvl(1)%nodes(j)%gid
479 n =
size(amg%lvl(1)%map_finest2lvl)
482 nid = amg%lvl(l-1)%map_finest2lvl(i)
483 do j = 1, amg%lvl(l)%nnodes
484 do k = 1, amg%lvl(l)%nodes(j)%ndofs
485 if (nid .eq. amg%lvl(l)%nodes(j)%dofs(k))
then
486 amg%lvl(l)%map_finest2lvl(i) = amg%lvl(l)%nodes(j)%gid
492 if (neko_bcknd_device .eq. 1)
then
494 amg%lvl(l)%map_finest2lvl(0) = n
495 call device_memcpy( amg%lvl(l)%map_finest2lvl, &
496 amg%lvl(l)%map_finest2lvl_d, n, &
497 host_to_device, .true.)
498 call device_memcpy( amg%lvl(l)%map_f2c, &
499 amg%lvl(l)%map_f2c_d, amg%lvl(l)%fine_lvl_dofs+1, &
500 host_to_device, .true.)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public sub3(a, b, c, n)
Vector subtraction .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Implements an aggregation for TreeAMG hierarchy structure.
subroutine aggregate_end(tamg, lvl_id)
Aggregate all dofs to a single point to form a tree-like structure.
subroutine aggregate_pairs(tamg, lvl_id, max_aggs, facet_neigh, agg_nhbr)
Aggregates pairs of dofs based on adjacent dofs.
subroutine aggregate_greedy(tamg, lvl_id, max_aggs, facet_neigh, agg_nhbr)
Aggregates dofs based on adjacent dofs.
subroutine aggregate_finest_level(tamg, lx, ly, lz, ne)
Aggregaiton on finest level Aggregates all dofs in an element into a single aggregate.
Implements multigrid using the TreeAMG hierarchy structure. USE:
recursive subroutine tamg_mg_cycle(x, b, n, lvl, amg, mgstuff, zero_initial_guess)
Recrsive multigrid cycle for the TreeAMG solver object.
subroutine calc_resid(r, x, b, amg, lvl, n)
Wrapper function to calculate residyal.
subroutine tamg_mg_free(this)
free tree amg solver object
recursive subroutine tamg_mg_cycle_d(x, b, x_d, b_d, n, lvl, amg, mgstuff, zero_initial_guess)
Recrsive multigrid cycle for the TreeAMG solver object on device.
subroutine fill_lvl_map(amg)
Create index mapping between levels and directly to finest level.
subroutine print_resid_info(r, x, b, r_d, x_d, b_d, amg, lvl, n)
subroutine tamg_mg_solve(this, z, r, n)
Solver function for the TreeAMG solver object.
subroutine print_preagg_info(lvl, nagg, agg_type)
subroutine tamg_mg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst, max_iter, cheby_degree)
Initialization of the TreeAMG multigrid solver.
Implements smoothers for use with TreeAMG matrix vector product.
Implements the base type for TreeAMG hierarchy structure.
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.
Type for a TreeAMG hierarchy.
Type for the TreeAMG solver.
Type for Chebyshev iteration using TreeAMG matvec.