51 use,
intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, &
52 c_associated, c_size_t, c_sizeof, c_int, c_loc
60 real(kind=
rp),
allocatable :: p(:)
61 real(kind=
rp),
allocatable :: q(:)
62 real(kind=
rp),
allocatable :: r(:)
63 real(kind=
rp),
allocatable :: s(:)
64 real(kind=
rp),
allocatable :: u(:,:)
65 real(kind=
rp),
allocatable :: w(:)
66 real(kind=
rp),
allocatable :: z(:)
67 real(kind=
rp),
allocatable :: mi(:)
68 real(kind=
rp),
allocatable :: ni(:)
69 real(kind=
rp),
allocatable :: alpha(:)
70 real(kind=
rp),
allocatable :: beta(:)
71 type(c_ptr) :: p_d = c_null_ptr
72 type(c_ptr) :: q_d = c_null_ptr
73 type(c_ptr) :: r_d = c_null_ptr
74 type(c_ptr) :: s_d = c_null_ptr
75 type(c_ptr) :: u_d_d = c_null_ptr
76 type(c_ptr) :: w_d = c_null_ptr
77 type(c_ptr) :: z_d = c_null_ptr
78 type(c_ptr) :: mi_d = c_null_ptr
79 type(c_ptr) :: ni_d = c_null_ptr
80 type(c_ptr) :: alpha_d = c_null_ptr
81 type(c_ptr) :: beta_d = c_null_ptr
82 type(c_ptr),
allocatable :: u_d(:)
83 type(c_ptr) :: gs_event = c_null_ptr
94 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
95 bind(c, name=
'cuda_pipecg_vecops')
96 use,
intrinsic :: iso_c_binding
99 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
100 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
102 real(c_rp) :: alpha, beta, reduction(3)
109 bind(c, name=
'cuda_cg_update_xp')
110 use,
intrinsic :: iso_c_binding
112 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
113 integer(c_int) :: p_cur, n, p_space
119 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
120 bind(c, name=
'hip_pipecg_vecops')
121 use,
intrinsic :: iso_c_binding
124 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
125 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
127 real(c_rp) :: alpha, beta, reduction(3)
134 bind(c, name=
'hip_cg_update_xp')
135 use,
intrinsic :: iso_c_binding
137 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
138 integer(c_int) :: p_cur, n, p_space
146 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
147 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
148 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
150 real(c_rp) :: alpha, beta, reduction(3)
153 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
156 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
158 call neko_error(
'No device backend configured')
163 use,
intrinsic :: iso_c_binding
164 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
165 integer(c_int) :: p_cur, n, p_space
171 call neko_error(
'No device backend configured')
178 class(
pc_t),
optional,
intent(in),
target :: M
179 integer,
intent(in) :: n
180 integer,
intent(in) :: max_iter
181 real(kind=
rp),
optional,
intent(in) :: rel_tol
182 real(kind=
rp),
optional,
intent(in) :: abs_tol
183 logical,
optional,
intent(in) :: monitor
185 integer(c_size_t) :: u_size
218 this%u_d(i) = c_null_ptr
224 ptr = c_loc(this%u_d)
228 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
229 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
230 else if (
present(rel_tol) .and.
present(abs_tol))
then
231 call this%ksp_init(max_iter, rel_tol, abs_tol)
232 else if (
present(monitor) .and.
present(abs_tol))
then
233 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
234 else if (
present(rel_tol) .and.
present(monitor))
then
235 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
236 else if (
present(rel_tol))
then
237 call this%ksp_init(max_iter, rel_tol = rel_tol)
238 else if (
present(abs_tol))
then
239 call this%ksp_init(max_iter, abs_tol = abs_tol)
240 else if (
present(monitor))
then
241 call this%ksp_init(max_iter, monitor = monitor)
243 call this%ksp_init(max_iter)
257 if (
allocated(this%p))
then
260 if (
allocated(this%q))
then
263 if (
allocated(this%r))
then
266 if (
allocated(this%s))
then
269 if (
allocated(this%u))
then
272 if (
allocated(this%w))
then
275 if (
allocated(this%z))
then
278 if (
allocated(this%mi))
then
281 if (
allocated(this%ni))
then
284 if (
allocated(this%alpha))
then
285 deallocate(this%alpha)
287 if (
allocated(this%beta))
then
288 deallocate(this%beta)
292 if (c_associated(this%p_d))
then
295 if (c_associated(this%q_d))
then
298 if (c_associated(this%r_d))
then
301 if (c_associated(this%s_d))
then
304 if (c_associated(this%u_d_d))
then
307 if (c_associated(this%w_d))
then
310 if (c_associated(this%z_d))
then
313 if (c_associated(this%mi_d))
then
316 if (c_associated(this%ni_d))
then
319 if (c_associated(this%alpha_d))
then
322 if (c_associated(this%beta_d))
then
325 if (
allocated(this%u_d))
then
327 if (c_associated(this%u_d(i)))
then
335 if (c_associated(this%gs_event))
then
344 class(
ax_t),
intent(in) :: ax
345 type(
field_t),
intent(inout) :: x
346 integer,
intent(in) :: n
347 real(kind=
rp),
dimension(n),
intent(in) :: f
348 type(
coef_t),
intent(inout) :: coef
350 type(
gs_t),
intent(inout) :: gs_h
352 integer,
optional,
intent(in) :: niter
353 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
354 real(kind=
rp) :: rnorm, rtr, reduction(3), norm_fac
355 real(kind=
rp) :: gamma1, gamma2, delta
356 real(kind=
rp) :: tmp1, tmp2, tmp3
357 type(mpi_request) :: request
358 type(mpi_status) :: status
362 if (
present(niter))
then
365 max_iter = this%max_iter
367 norm_fac = 1.0_rp / sqrt(coef%volume)
369 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
370 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
371 alpha => this%alpha, beta => this%beta, &
372 alpha_d => this%alpha_d, beta_d => this%beta_d, &
373 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
374 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
375 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
388 call this%M%solve(u(1,u_prev), r, n)
389 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
390 call gs_h%op(w, n, gs_op_add, this%gs_event)
392 call blst%apply_scalar(w, n)
395 rnorm = sqrt(rtr)*norm_fac
396 ksp_results%res_start = rnorm
397 ksp_results%res_final = rnorm
399 if(
abscmp(rnorm, 0.0_rp))
return
412 call this%monitor_start(
'PipeCG')
413 do iter = 1, max_iter
414 call mpi_iallreduce(mpi_in_place, reduction, 3, &
417 call this%M%solve(mi, w, n)
418 call ax%compute(ni, mi, coef, x%msh, x%Xh)
419 call gs_h%op(ni, n, gs_op_add, this%gs_event)
421 call blst%apply(ni, n)
423 call mpi_wait(request, status, ierr)
425 gamma1 = reduction(1)
429 rnorm = sqrt(rtr)*norm_fac
430 call this%monitor_iter(iter, rnorm)
431 if (rnorm .lt. this%abs_tol)
exit
434 if (iter .gt. 1)
then
435 beta(p_cur) = gamma1 / gamma2
436 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
439 alpha(p_cur) = gamma1/delta
443 s_d, u_d(u_prev), u_d(p_cur),&
445 mi_d, alpha(p_cur), beta(p_cur),&
446 coef%mult_d, reduction, n)
456 alpha(1) = alpha(p_cur)
457 beta(1) = beta(p_cur)
466 if ( p_cur .ne. 1)
then
472 call this%monitor_stop()
473 ksp_results%res_final = rnorm
474 ksp_results%iter = iter
475 ksp_results%converged = this%is_converged(iter, rnorm)
483 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
485 class(ax_t),
intent(in) :: ax
486 type(field_t),
intent(inout) :: x
487 type(field_t),
intent(inout) :: y
488 type(field_t),
intent(inout) :: z
489 integer,
intent(in) :: n
490 real(kind=rp),
dimension(n),
intent(in) :: fx
491 real(kind=rp),
dimension(n),
intent(in) :: fy
492 real(kind=rp),
dimension(n),
intent(in) :: fz
493 type(coef_t),
intent(inout) :: coef
494 type(bc_list_t),
intent(inout) :: blstx
495 type(bc_list_t),
intent(inout) :: blsty
496 type(bc_list_t),
intent(inout) :: blstz
497 type(gs_t),
intent(inout) :: gs_h
498 type(ksp_monitor_t),
dimension(3) :: ksp_results
499 integer,
optional,
intent(in) :: niter
501 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
502 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
503 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
integer pe_size
MPI size of communicator.
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public ksp_max_iter
Maximum number of iters.
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.