51 use mpi_f08,
only: mpi_allreduce, mpi_min, mpi_in_place, mpi_integer
67 use,
intrinsic :: iso_c_binding
73 real(kind=
rp),
allocatable :: r(:)
74 real(kind=
rp),
allocatable :: b(:)
75 real(kind=
rp),
allocatable :: x(:)
76 type(c_ptr) :: r_d = c_null_ptr
77 type(c_ptr) :: b_d = c_null_ptr
78 type(c_ptr) :: x_d = c_null_ptr
109 max_iter, cheby_degree)
111 class(
ax_t),
target,
intent(in) :: ax
112 type(
space_t),
target,
intent(in) :: Xh
113 type(
coef_t),
target,
intent(in) :: coef
114 type(
mesh_t),
target,
intent(in) :: msh
115 type(
gs_t),
target,
intent(in) :: gs_h
116 type(
bc_list_t),
target,
intent(in) :: blst
117 integer,
intent(in) :: nlvls
118 integer,
intent(in) :: max_iter
119 integer,
intent(in) :: cheby_degree
120 integer :: lvl, n, mlvl, target_num_aggs
121 integer,
allocatable :: agg_nhbr(:,:), nhbr_tmp(:,:)
122 character(len=LOG_SIZE) :: log_buf
123 integer :: glb_min_target_aggs
124 logical :: use_greedy_agg
128 write(log_buf,
'(A28,I2,A8)')
'Creating AMG hierarchy with', &
133 call this%amg%init(ax, xh, coef, msh, gs_h, nlvls, blst)
136 use_greedy_agg = .true.
141 allocate( agg_nhbr, source = msh%facet_neigh )
144 if (use_greedy_agg)
then
145 target_num_aggs = this%amg%lvl(mlvl-1)%nnodes / 8
147 target_num_aggs = this%amg%lvl(mlvl-1)%nnodes / 2
150 glb_min_target_aggs = target_num_aggs
151 call mpi_allreduce(mpi_in_place, glb_min_target_aggs, 1, &
153 if (glb_min_target_aggs .lt. 4 )
then
155 "TAMG: Too many levels. Not enough DOFs for coarsest grid.")
156 this%amg%nlvls = mlvl
160 if (use_greedy_agg)
then
165 call aggregate_pairs( this%amg, mlvl, target_num_aggs, agg_nhbr, nhbr_tmp)
169 deallocate( nhbr_tmp )
171 deallocate( agg_nhbr )
176 this%max_iter = max_iter
178 this%nlvls = this%amg%nlvls
179 if (this%nlvls .gt. this%amg%nlvls)
then
181 "Requested number multigrid levels &
182 & is greater than the initialized AMG levels")
186 allocate(this%smoo(0:(this%amg%nlvls)))
187 do lvl = 0, this%amg%nlvls-1
188 n = this%amg%lvl(lvl+1)%fine_lvl_dofs
189 call this%smoo(lvl)%init(n, lvl, cheby_degree)
193 allocate(this%wrk(0:(this%amg%nlvls)))
194 do lvl = 0, this%amg%nlvls-1
195 n = this%amg%lvl(lvl+1)%fine_lvl_dofs
197 allocate( this%wrk(lvl)%r(n) )
198 allocate( this%wrk(lvl)%b(n) )
199 allocate( this%wrk(lvl)%x(n) )
201 call device_map( this%wrk(lvl)%r, this%wrk(lvl)%r_d, n)
202 call device_map( this%wrk(lvl)%b, this%wrk(lvl)%b_d, n)
203 call device_map( this%wrk(lvl)%x, this%wrk(lvl)%x_d, n)
224 if (
allocated(this%amg))
then
228 if (
allocated(this%smoo))
then
229 do i = 0, (
size(this%smoo)-1)
230 call this%smoo(i)%free()
232 deallocate(this%smoo)
234 if (
allocated(this%wrk))
then
235 do i = 0, (
size(this%wrk)-1)
241 if (
allocated(this%wrk(i)%r))
deallocate(this%wrk(i)%r)
242 if (
allocated(this%wrk(i)%b))
deallocate(this%wrk(i)%b)
243 if (
allocated(this%wrk(i)%x))
deallocate(this%wrk(i)%x)
254 integer,
intent(in) :: n
256 real(kind=
rp),
dimension(n),
intent(inout) :: z
257 real(kind=
rp),
dimension(n),
intent(inout) :: r
260 integer :: iter, max_iter
261 logical :: zero_initial_guess
263 max_iter = this%max_iter
271 zero_initial_guess = .true.
273 do iter = 1, max_iter
274 call this%mg_cycle_d(zero_initial_guess)
275 zero_initial_guess = .false.
280 call rzero(this%wrk(0)%x, n)
281 call copy(this%wrk(0)%b, r, n)
282 zero_initial_guess = .true.
284 do iter = 1, max_iter
285 call this%mg_cycle(zero_initial_guess)
286 zero_initial_guess = .false.
288 call copy(z, this%wrk(0)%x, n)
298 logical,
intent(inout) :: zero_initial_guess
299 character(len=2) :: lvl_name
300 integer :: max_lvl, lvl
302 max_lvl = this%nlvls-1
304 do lvl = 0, max_lvl-1
305 write(lvl_name,
'(I0)') lvl
307 associate(x => this%wrk(lvl)%x, b => this%wrk(lvl)%b, &
308 r => this%wrk(lvl)%r, n => this%wrk(lvl)%n)
312 call this%smoo(lvl)%solve(x, b, n, this%amg, &
321 call this%amg%interp_f2c(this%wrk(lvl+1)%b, r, lvl+1)
323 call rzero(this%wrk(lvl+1)%x, this%wrk(lvl+1)%n)
324 zero_initial_guess = .true.
328 write(lvl_name,
'(I0)') max_lvl
333 call this%smoo(max_lvl)%solve(this%wrk(max_lvl)%x, &
334 this%wrk(max_lvl)%b, this%amg%lvl(max_lvl)%nnodes, this%amg, &
338 zero_initial_guess = .false.
340 do lvl = max_lvl-1, 0, -1
341 write(lvl_name,
'(I0)') lvl
343 associate(x => this%wrk(lvl)%x, b => this%wrk(lvl)%b, &
344 r => this%wrk(lvl)%r, n => this%wrk(lvl)%n)
348 call this%amg%interp_c2f(r, this%wrk(lvl+1)%x, lvl+1)
356 call this%smoo(lvl)%solve(x, b, n, this%amg)
358 call profiler_end_region(
"AMG_level_" // trim(lvl_name))
367 logical,
intent(inout) :: zero_initial_guess
368 character(len=2) :: lvl_name
369 integer :: max_lvl, lvl
371 max_lvl = this%nlvls-1
373 do lvl = 0, max_lvl-1
374 write(lvl_name,
'(I0)') lvl
375 call profiler_start_region(
"AMG_level_" // trim(lvl_name))
376 associate(x => this%wrk(lvl)%x, x_d => this%wrk(lvl)%x_d, &
377 b => this%wrk(lvl)%b, b_d => this%wrk(lvl)%b_d, &
378 r => this%wrk(lvl)%r, r_d => this%wrk(lvl)%r_d, &
379 n => this%wrk(lvl)%n)
383 call this%smoo(lvl)%device_solve(x, b, x_d, b_d, n, this%amg, &
388 call this%amg%device_matvec(r, x, r_d, x_d, lvl)
389 call device_sub3(r_d, b_d, r_d, n)
393 call this%amg%interp_f2c_d(this%wrk(lvl+1)%b_d, r_d, lvl+1)
395 call device_rzero(this%wrk(lvl+1)%x_d, this%wrk(lvl+1)%n)
396 zero_initial_guess = .true.
398 call profiler_end_region(
"AMG_level_" // trim(lvl_name))
400 write(lvl_name,
'(I0)') max_lvl
401 call profiler_start_region(
"AMG_level_" // trim(lvl_name))
405 call this%smoo(max_lvl)%device_solve( &
406 this%wrk(max_lvl)%x, this%wrk(max_lvl)%b, &
407 this%wrk(max_lvl)%x_d, this%wrk(max_lvl)%b_d, &
408 this%amg%lvl(max_lvl)%nnodes, this%amg, &
410 call profiler_end_region(
"AMG_level_" // trim(lvl_name))
412 zero_initial_guess = .false.
414 do lvl = max_lvl-1, 0, -1
415 write(lvl_name,
'(I0)') lvl
416 call profiler_start_region(
"AMG_level_" // trim(lvl_name))
417 associate(x => this%wrk(lvl)%x, x_d => this%wrk(lvl)%x_d, &
418 b => this%wrk(lvl)%b, b_d => this%wrk(lvl)%b_d, &
419 r => this%wrk(lvl)%r, r_d => this%wrk(lvl)%r_d, &
420 n => this%wrk(lvl)%n)
424 call this%amg%interp_c2f_d(r_d, this%wrk(lvl+1)%x_d, lvl+1, r)
428 call device_add2(x_d, r_d, n)
432 call this%smoo(lvl)%device_solve(x, b, x_d, b_d, n, this%amg)
434 call profiler_end_region(
"AMG_level_" // trim(lvl_name))
447 integer,
intent(in) :: n
448 real(kind=rp),
intent(inout) :: r(n)
449 real(kind=rp),
intent(inout) :: x(n)
450 real(kind=rp),
intent(inout) :: b(n)
451 type(tamg_hierarchy_t),
intent(inout) :: amg
452 integer,
intent(in) :: lvl
454 call amg%matvec(r, x, lvl)
455 call sub3(r, b, r, n)
460 integer,
intent(in) :: lvl, nagg, agg_type
461 character(len=LOG_SIZE) :: log_buf
463 if (agg_type .eq. 1)
then
464 write(log_buf,
'(A8,I2,A31)')
'-- level', lvl, &
465 '-- Calling Greedy Aggregation'
466 else if (agg_type .eq. 2)
then
467 write(log_buf,
'(A8,I2,A33)')
'-- level', lvl, &
468 '-- Calling Pairwise Aggregation'
470 write(log_buf,
'(A8,I2,A31)')
'-- level', lvl, &
471 '-- UNKNOWN Aggregation'
473 call neko_log%message(log_buf)
474 write(log_buf,
'(A33,I6)')
'Target Aggregates:', nagg
475 call neko_log%message(log_buf)
479 integer,
intent(in) :: lvl, n
480 real(kind=rp),
intent(inout) :: r(n)
481 real(kind=rp),
intent(inout) :: x(n)
482 real(kind=rp),
intent(inout) :: b(n)
486 type(tamg_hierarchy_t),
intent(inout) :: amg
488 character(len=LOG_SIZE) :: log_buf
490 call amg%device_matvec(r, x, r_d, x_d, lvl)
491 call device_sub3(r_d, b_d, r_d, n)
492 val = device_glsc2(r_d, r_d, n)
494 write(log_buf,
'(A33,I6,F12.6)')
'tAMG resid:', lvl, val
495 call neko_log%message(log_buf)
501 type(tamg_hierarchy_t),
intent(inout) :: amg
502 integer :: i, j, k, l, nid, n
503 do j = 1, amg%lvl(1)%nnodes
504 do k = 1, amg%lvl(1)%nodes(j)%ndofs
505 nid = amg%lvl(1)%nodes(j)%dofs(k)
506 amg%lvl(1)%map_finest2lvl(nid) = amg%lvl(1)%nodes(j)%gid
509 n =
size(amg%lvl(1)%map_finest2lvl)
512 nid = amg%lvl(l-1)%map_finest2lvl(i)
513 do j = 1, amg%lvl(l)%nnodes
514 do k = 1, amg%lvl(l)%nodes(j)%ndofs
515 if (nid .eq. amg%lvl(l)%nodes(j)%dofs(k))
then
516 amg%lvl(l)%map_finest2lvl(i) = amg%lvl(l)%nodes(j)%gid
522 if (neko_bcknd_device .eq. 1)
then
524 amg%lvl(l)%map_finest2lvl(0) = n
525 call device_memcpy( amg%lvl(l)%map_finest2lvl, &
526 amg%lvl(l)%map_finest2lvl_d, n, &
527 host_to_device, .true.)
528 call device_memcpy( amg%lvl(l)%map_f2c, &
529 amg%lvl(l)%map_f2c_d, amg%lvl(l)%fine_lvl_dofs+1, &
530 host_to_device, .true.)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
subroutine, public sub3(a, b, c, n)
Vector subtraction .
subroutine, public add2(a, b, n)
Vector addition .
subroutine, public col2(a, b, n)
Vector multiplication .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Defines a function space.
Implements an aggregation for TreeAMG hierarchy structure.
subroutine aggregate_end(tamg, lvl_id)
Aggregate all dofs to a single point to form a tree-like structure.
subroutine aggregate_pairs(tamg, lvl_id, max_aggs, facet_neigh, agg_nhbr)
Aggregates pairs of dofs based on adjacent dofs.
subroutine aggregate_greedy(tamg, lvl_id, max_aggs, facet_neigh, agg_nhbr)
Aggregates dofs based on adjacent dofs.
subroutine aggregate_finest_level(tamg, lx, ly, lz, ne)
Aggregaiton on finest level Aggregates all dofs in an element into a single aggregate.
Implements multigrid using the TreeAMG hierarchy structure. USE:
subroutine calc_resid(r, x, b, amg, lvl, n)
Wrapper function to calculate residyal.
subroutine tamg_mg_cycle_d(this, zero_initial_guess)
multigrid cycle for the TreeAMG solver object on device
subroutine tamg_mg_free(this)
free tree amg solver object
subroutine fill_lvl_map(amg)
Create index mapping between levels and directly to finest level.
subroutine print_resid_info(r, x, b, r_d, x_d, b_d, amg, lvl, n)
subroutine tamg_mg_solve(this, z, r, n)
Solver function for the TreeAMG solver object.
subroutine tamg_mg_cycle(this, zero_initial_guess)
multigrid cycle for the TreeAMG solver object
subroutine print_preagg_info(lvl, nagg, agg_type)
subroutine tamg_mg_init(this, ax, xh, coef, msh, gs_h, nlvls, blst, max_iter, cheby_degree)
Initialization of the TreeAMG multigrid solver.
Implements smoothers for use with TreeAMG matrix vector product.
Implements the base type for TreeAMG hierarchy structure.
subroutine, public tamg_node_init(node, gid, ndofs)
Initialization of a TreeAMG tree node.
subroutine, public tamg_lvl_init(tamg_lvl, lvl, nnodes, ndofs)
Initialization of a TreeAMG level.
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.
Type for a TreeAMG hierarchy.
Type for the TreeAMG solver.
Type for Chebyshev iteration using TreeAMG matvec.