49 use mpi_f08,
only : mpi_iallreduce, mpi_status, &
50 mpi_sum, mpi_in_place, mpi_request, mpi_wait
51 use,
intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, &
52 c_associated, c_size_t, c_sizeof, c_int, c_loc
60 real(kind=
rp),
allocatable :: p(:)
61 real(kind=
rp),
allocatable :: q(:)
62 real(kind=
rp),
allocatable :: r(:)
63 real(kind=
rp),
allocatable :: s(:)
64 real(kind=
rp),
allocatable :: u(:,:)
65 real(kind=
rp),
allocatable :: w(:)
66 real(kind=
rp),
allocatable :: z(:)
67 real(kind=
rp),
allocatable :: mi(:)
68 real(kind=
rp),
allocatable :: ni(:)
69 real(kind=
rp),
allocatable :: alpha(:)
70 real(kind=
rp),
allocatable :: beta(:)
71 type(c_ptr) :: p_d = c_null_ptr
72 type(c_ptr) :: q_d = c_null_ptr
73 type(c_ptr) :: r_d = c_null_ptr
74 type(c_ptr) :: s_d = c_null_ptr
75 type(c_ptr) :: u_d_d = c_null_ptr
76 type(c_ptr) :: w_d = c_null_ptr
77 type(c_ptr) :: z_d = c_null_ptr
78 type(c_ptr) :: mi_d = c_null_ptr
79 type(c_ptr) :: ni_d = c_null_ptr
80 type(c_ptr) :: alpha_d = c_null_ptr
81 type(c_ptr) :: beta_d = c_null_ptr
82 type(c_ptr),
allocatable :: u_d(:)
83 type(c_ptr) :: gs_event = c_null_ptr
94 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
95 bind(c, name=
'cuda_pipecg_vecops')
96 use,
intrinsic :: iso_c_binding
99 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
100 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
102 real(c_rp) :: alpha, beta, reduction(3)
109 bind(c, name=
'cuda_cg_update_xp')
110 use,
intrinsic :: iso_c_binding
112 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
113 integer(c_int) :: p_cur, n, p_space
119 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
120 bind(c, name=
'hip_pipecg_vecops')
121 use,
intrinsic :: iso_c_binding
124 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
125 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
127 real(c_rp) :: alpha, beta, reduction(3)
134 bind(c, name=
'hip_cg_update_xp')
135 use,
intrinsic :: iso_c_binding
137 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
138 integer(c_int) :: p_cur, n, p_space
146 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
147 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
148 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
150 real(c_rp) :: alpha, beta, reduction(3)
153 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
156 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
158 call neko_error(
'No device backend configured')
163 use,
intrinsic :: iso_c_binding
164 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
165 integer(c_int) :: p_cur, n, p_space
171 call neko_error(
'No device backend configured')
178 class(
pc_t),
optional,
intent(in),
target :: M
179 integer,
intent(in) :: n
180 integer,
intent(in) :: max_iter
181 real(kind=
rp),
optional,
intent(in) :: rel_tol
182 real(kind=
rp),
optional,
intent(in) :: abs_tol
183 logical,
optional,
intent(in) :: monitor
185 integer(c_size_t) :: u_size
218 this%u_d(i) = c_null_ptr
224 ptr = c_loc(this%u_d)
228 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
229 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
230 else if (
present(rel_tol) .and.
present(abs_tol))
then
231 call this%ksp_init(max_iter, rel_tol, abs_tol)
232 else if (
present(monitor) .and.
present(abs_tol))
then
233 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
234 else if (
present(rel_tol) .and.
present(monitor))
then
235 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
236 else if (
present(rel_tol))
then
237 call this%ksp_init(max_iter, rel_tol = rel_tol)
238 else if (
present(abs_tol))
then
239 call this%ksp_init(max_iter, abs_tol = abs_tol)
240 else if (
present(monitor))
then
241 call this%ksp_init(max_iter, monitor = monitor)
243 call this%ksp_init(max_iter)
257 if (
allocated(this%p))
then
258 if (c_associated(this%p_d))
then
263 if (
allocated(this%q))
then
264 if (c_associated(this%q_d))
then
269 if (
allocated(this%r))
then
270 if (c_associated(this%r_d))
then
275 if (
allocated(this%s))
then
276 if (c_associated(this%s_d))
then
281 if (
allocated(this%u))
then
282 if (
allocated(this%u_d))
then
284 if (c_associated(this%u_d(i)))
then
291 if (
allocated(this%w))
then
292 if (c_associated(this%w_d))
then
297 if (
allocated(this%z))
then
298 if (c_associated(this%z_d))
then
303 if (
allocated(this%mi))
then
304 if (c_associated(this%mi_d))
then
309 if (
allocated(this%ni))
then
310 if (c_associated(this%ni_d))
then
315 if (
allocated(this%alpha))
then
316 if (c_associated(this%alpha_d))
then
319 deallocate(this%alpha)
321 if (
allocated(this%beta))
then
322 if (c_associated(this%beta_d))
then
325 deallocate(this%beta)
328 if (c_associated(this%u_d_d))
then
334 if (c_associated(this%gs_event))
then
343 class(
ax_t),
intent(in) :: ax
344 type(
field_t),
intent(inout) :: x
345 integer,
intent(in) :: n
346 real(kind=
rp),
dimension(n),
intent(in) :: f
347 type(
coef_t),
intent(inout) :: coef
349 type(
gs_t),
intent(inout) :: gs_h
351 integer,
optional,
intent(in) :: niter
352 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
353 real(kind=
rp) :: rnorm, rtr, reduction(3), norm_fac
354 real(kind=
rp) :: gamma1, gamma2, delta
355 real(kind=
rp) :: tmp1, tmp2, tmp3
356 type(mpi_request) :: request
357 type(mpi_status) :: status
361 if (
present(niter))
then
364 max_iter = this%max_iter
366 norm_fac = 1.0_rp / sqrt(coef%volume)
368 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
369 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
370 alpha => this%alpha, beta => this%beta, &
371 alpha_d => this%alpha_d, beta_d => this%beta_d, &
372 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
373 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
374 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
387 call this%M%solve(u(1,u_prev), r, n)
388 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
389 call gs_h%op(w, n, gs_op_add, this%gs_event)
391 call blst%apply_scalar(w, n)
394 rnorm = sqrt(rtr)*norm_fac
395 ksp_results%res_start = rnorm
396 ksp_results%res_final = rnorm
398 if(
abscmp(rnorm, 0.0_rp))
then
399 ksp_results%converged = .true.
414 call this%monitor_start(
'PipeCG')
415 do iter = 1, max_iter
416 call mpi_iallreduce(mpi_in_place, reduction, 3, &
419 call this%M%solve(mi, w, n)
420 call ax%compute(ni, mi, coef, x%msh, x%Xh)
421 call gs_h%op(ni, n, gs_op_add, this%gs_event)
423 call blst%apply(ni, n)
425 call mpi_wait(request, status, ierr)
427 gamma1 = reduction(1)
431 rnorm = sqrt(rtr)*norm_fac
432 call this%monitor_iter(iter, rnorm)
433 if (rnorm .lt. this%abs_tol)
exit
436 if (iter .gt. 1)
then
437 beta(p_cur) = gamma1 / gamma2
438 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
441 alpha(p_cur) = gamma1/delta
445 s_d, u_d(u_prev), u_d(p_cur),&
447 mi_d, alpha(p_cur), beta(p_cur),&
448 coef%mult_d, reduction, n)
458 alpha(1) = alpha(p_cur)
459 beta(1) = beta(p_cur)
468 if ( p_cur .ne. 1)
then
474 call this%monitor_stop()
475 ksp_results%res_final = rnorm
476 ksp_results%iter = iter
477 ksp_results%converged = this%is_converged(iter, rnorm)
485 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
487 class(ax_t),
intent(in) :: ax
488 type(field_t),
intent(inout) :: x
489 type(field_t),
intent(inout) :: y
490 type(field_t),
intent(inout) :: z
491 integer,
intent(in) :: n
492 real(kind=rp),
dimension(n),
intent(in) :: fx
493 real(kind=rp),
dimension(n),
intent(in) :: fy
494 real(kind=rp),
dimension(n),
intent(in) :: fz
495 type(coef_t),
intent(inout) :: coef
496 type(bc_list_t),
intent(inout) :: blstx
497 type(bc_list_t),
intent(inout) :: blsty
498 type(bc_list_t),
intent(inout) :: blstz
499 type(gs_t),
intent(inout) :: gs_h
500 type(ksp_monitor_t),
dimension(3) :: ksp_results
501 integer,
optional,
intent(in) :: niter
503 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
504 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
505 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Unmap a Fortran array from a device (deassociate and free)
Defines a Matrix-vector product.
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public ksp_max_iter
Maximum number of iters.
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.