55 real(kind=
rp),
allocatable :: p(:)
56 real(kind=
rp),
allocatable :: q(:)
57 real(kind=
rp),
allocatable :: r(:)
58 real(kind=
rp),
allocatable :: s(:)
59 real(kind=
rp),
allocatable :: u(:,:)
60 real(kind=
rp),
allocatable :: w(:)
61 real(kind=
rp),
allocatable :: z(:)
62 real(kind=
rp),
allocatable :: mi(:)
63 real(kind=
rp),
allocatable :: ni(:)
64 real(kind=
rp),
allocatable :: alpha(:)
65 real(kind=
rp),
allocatable :: beta(:)
66 type(c_ptr) :: p_d = c_null_ptr
67 type(c_ptr) :: q_d = c_null_ptr
68 type(c_ptr) :: r_d = c_null_ptr
69 type(c_ptr) :: s_d = c_null_ptr
70 type(c_ptr) :: u_d_d = c_null_ptr
71 type(c_ptr) :: w_d = c_null_ptr
72 type(c_ptr) :: z_d = c_null_ptr
73 type(c_ptr) :: mi_d = c_null_ptr
74 type(c_ptr) :: ni_d = c_null_ptr
75 type(c_ptr) :: alpha_d = c_null_ptr
76 type(c_ptr) :: beta_d = c_null_ptr
77 type(c_ptr),
allocatable :: u_d(:)
78 type(c_ptr) :: gs_event = c_null_ptr
89 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
90 bind(c, name=
'cuda_pipecg_vecops')
91 use,
intrinsic :: iso_c_binding
94 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
95 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
97 real(c_rp) :: alpha, beta, reduction(3)
104 bind(c, name=
'cuda_cg_update_xp')
105 use,
intrinsic :: iso_c_binding
107 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
108 integer(c_int) :: p_cur, n, p_space
114 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
115 bind(c, name=
'hip_pipecg_vecops')
116 use,
intrinsic :: iso_c_binding
119 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
120 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
122 real(c_rp) :: alpha, beta, reduction(3)
129 bind(c, name=
'hip_cg_update_xp')
130 use,
intrinsic :: iso_c_binding
132 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
133 integer(c_int) :: p_cur, n, p_space
141 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
142 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
143 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
145 real(c_rp) :: alpha, beta, reduction(3)
148 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
151 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
153 call neko_error(
'No device backend configured')
158 use,
intrinsic :: iso_c_binding
159 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
160 integer(c_int) :: p_cur, n, p_space
166 call neko_error(
'No device backend configured')
173 class(
pc_t),
optional,
intent(in),
target :: M
174 integer,
intent(in) :: n
175 integer,
intent(in) :: max_iter
176 real(kind=
rp),
optional,
intent(in) :: rel_tol
177 real(kind=
rp),
optional,
intent(in) :: abs_tol
178 logical,
optional,
intent(in) :: monitor
180 integer(c_size_t) :: u_size
213 this%u_d(i) = c_null_ptr
219 ptr = c_loc(this%u_d)
223 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
224 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
225 else if (
present(rel_tol) .and.
present(abs_tol))
then
226 call this%ksp_init(max_iter, rel_tol, abs_tol)
227 else if (
present(monitor) .and.
present(abs_tol))
then
228 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
229 else if (
present(rel_tol) .and.
present(monitor))
then
230 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
231 else if (
present(rel_tol))
then
232 call this%ksp_init(max_iter, rel_tol = rel_tol)
233 else if (
present(abs_tol))
then
234 call this%ksp_init(max_iter, abs_tol = abs_tol)
235 else if (
present(monitor))
then
236 call this%ksp_init(max_iter, monitor = monitor)
238 call this%ksp_init(max_iter)
252 if (
allocated(this%p))
then
255 if (
allocated(this%q))
then
258 if (
allocated(this%r))
then
261 if (
allocated(this%s))
then
264 if (
allocated(this%u))
then
267 if (
allocated(this%w))
then
270 if (
allocated(this%z))
then
273 if (
allocated(this%mi))
then
276 if (
allocated(this%ni))
then
279 if (
allocated(this%alpha))
then
280 deallocate(this%alpha)
282 if (
allocated(this%beta))
then
283 deallocate(this%beta)
287 if (c_associated(this%p_d))
then
290 if (c_associated(this%q_d))
then
293 if (c_associated(this%r_d))
then
296 if (c_associated(this%s_d))
then
299 if (c_associated(this%u_d_d))
then
302 if (c_associated(this%w_d))
then
305 if (c_associated(this%z_d))
then
308 if (c_associated(this%mi_d))
then
311 if (c_associated(this%ni_d))
then
314 if (c_associated(this%alpha_d))
then
317 if (c_associated(this%beta_d))
then
320 if (
allocated(this%u_d))
then
322 if (c_associated(this%u_d(i)))
then
330 if (c_associated(this%gs_event))
then
339 class(
ax_t),
intent(in) :: ax
340 type(
field_t),
intent(inout) :: x
341 integer,
intent(in) :: n
342 real(kind=
rp),
dimension(n),
intent(in) :: f
343 type(
coef_t),
intent(inout) :: coef
345 type(
gs_t),
intent(inout) :: gs_h
347 integer,
optional,
intent(in) :: niter
348 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
349 real(kind=
rp) :: rnorm, rtr, reduction(3), norm_fac
350 real(kind=
rp) :: gamma1, gamma2, delta
351 real(kind=
rp) :: tmp1, tmp2, tmp3
352 type(mpi_request) :: request
353 type(mpi_status) :: status
357 if (
present(niter))
then
360 max_iter = this%max_iter
362 norm_fac = 1.0_rp / sqrt(coef%volume)
364 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
365 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
366 alpha => this%alpha, beta => this%beta, &
367 alpha_d => this%alpha_d, beta_d => this%beta_d, &
368 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
369 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
370 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
383 call this%M%solve(u(1,u_prev), r, n)
384 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
385 call gs_h%op(w, n, gs_op_add, this%gs_event)
387 call blst%apply_scalar(w, n)
390 rnorm = sqrt(rtr)*norm_fac
391 ksp_results%res_start = rnorm
392 ksp_results%res_final = rnorm
394 if(
abscmp(rnorm, 0.0_rp))
return
407 call this%monitor_start(
'PipeCG')
408 do iter = 1, max_iter
409 call mpi_iallreduce(mpi_in_place, reduction, 3, &
412 call this%M%solve(mi, w, n)
413 call ax%compute(ni, mi, coef, x%msh, x%Xh)
414 call gs_h%op(ni, n, gs_op_add, this%gs_event)
416 call blst%apply(ni, n)
418 call mpi_wait(request, status, ierr)
420 gamma1 = reduction(1)
424 rnorm = sqrt(rtr)*norm_fac
425 call this%monitor_iter(iter, rnorm)
426 if (rnorm .lt. this%abs_tol)
exit
429 if (iter .gt. 1)
then
430 beta(p_cur) = gamma1 / gamma2
431 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
434 alpha(p_cur) = gamma1/delta
438 s_d, u_d(u_prev), u_d(p_cur),&
440 mi_d, alpha(p_cur), beta(p_cur),&
441 coef%mult_d, reduction, n)
451 alpha(1) = alpha(p_cur)
452 beta(1) = beta(p_cur)
461 if ( p_cur .ne. 1)
then
467 call this%monitor_stop()
468 ksp_results%res_final = rnorm
469 ksp_results%iter = iter
470 ksp_results%converged = this%is_converged(iter, rnorm)
478 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
480 class(ax_t),
intent(in) :: ax
481 type(field_t),
intent(inout) :: x
482 type(field_t),
intent(inout) :: y
483 type(field_t),
intent(inout) :: z
484 integer,
intent(in) :: n
485 real(kind=rp),
dimension(n),
intent(in) :: fx
486 real(kind=rp),
dimension(n),
intent(in) :: fy
487 real(kind=rp),
dimension(n),
intent(in) :: fz
488 type(coef_t),
intent(inout) :: coef
489 type(bc_list_t),
intent(inout) :: blstx
490 type(bc_list_t),
intent(inout) :: blsty
491 type(bc_list_t),
intent(inout) :: blstz
492 type(gs_t),
intent(inout) :: gs_h
493 type(ksp_monitor_t),
dimension(3) :: ksp_results
494 integer,
optional,
intent(in) :: niter
496 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
497 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
498 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public ksp_max_iter
Maximum number of iters.
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.