55 real(kind=
rp),
allocatable :: p(:)
56 real(kind=
rp),
allocatable :: q(:)
57 real(kind=
rp),
allocatable :: r(:)
58 real(kind=
rp),
allocatable :: s(:)
59 real(kind=
rp),
allocatable :: u(:,:)
60 real(kind=
rp),
allocatable :: w(:)
61 real(kind=
rp),
allocatable :: z(:)
62 real(kind=
rp),
allocatable :: mi(:)
63 real(kind=
rp),
allocatable :: ni(:)
64 real(kind=
rp),
allocatable :: alpha(:)
65 real(kind=
rp),
allocatable :: beta(:)
66 type(c_ptr) :: p_d = c_null_ptr
67 type(c_ptr) :: q_d = c_null_ptr
68 type(c_ptr) :: r_d = c_null_ptr
69 type(c_ptr) :: s_d = c_null_ptr
70 type(c_ptr) :: u_d_d = c_null_ptr
71 type(c_ptr) :: w_d = c_null_ptr
72 type(c_ptr) :: z_d = c_null_ptr
73 type(c_ptr) :: mi_d = c_null_ptr
74 type(c_ptr) :: ni_d = c_null_ptr
75 type(c_ptr) :: alpha_d = c_null_ptr
76 type(c_ptr) :: beta_d = c_null_ptr
77 type(c_ptr),
allocatable :: u_d(:)
78 type(c_ptr) :: gs_event = c_null_ptr
88 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
89 bind(c, name=
'cuda_pipecg_vecops')
90 use,
intrinsic :: iso_c_binding
93 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
94 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
96 real(c_rp) :: alpha, beta, reduction(3)
103 bind(c, name=
'cuda_cg_update_xp')
104 use,
intrinsic :: iso_c_binding
106 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
107 integer(c_int) :: p_cur, n, p_space
113 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
114 bind(c, name=
'hip_pipecg_vecops')
115 use,
intrinsic :: iso_c_binding
118 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
119 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
121 real(c_rp) :: alpha, beta, reduction(3)
128 bind(c, name=
'hip_cg_update_xp')
129 use,
intrinsic :: iso_c_binding
131 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
132 integer(c_int) :: p_cur, n, p_space
140 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
141 type(c_ptr),
value :: p_d, q_d, r_d, s_d, u_d1, u_d2
142 type(c_ptr),
value :: w_d, ni_d, mi_d, z_d, mult_d
144 real(c_rp) :: alpha, beta, reduction(3)
147 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
150 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
152 call neko_error(
'No device backend configured')
157 use,
intrinsic :: iso_c_binding
158 type(c_ptr),
value :: x_d, p_d, u_d_d, alpha, beta
159 integer(c_int) :: p_cur, n, p_space
165 call neko_error(
'No device backend configured')
172 class(
pc_t),
optional,
intent(inout),
target :: M
173 integer,
intent(in) :: n
174 integer,
intent(in) :: max_iter
175 real(kind=
rp),
optional,
intent(inout) :: rel_tol
176 real(kind=
rp),
optional,
intent(inout) :: abs_tol
178 integer(c_size_t) :: u_size
211 this%u_d(i) = c_null_ptr
217 ptr = c_loc(this%u_d)
221 if (
present(rel_tol) .and.
present(abs_tol))
then
222 call this%ksp_init(max_iter, rel_tol, abs_tol)
223 else if (
present(rel_tol))
then
224 call this%ksp_init(max_iter, rel_tol=rel_tol)
225 else if (
present(abs_tol))
then
226 call this%ksp_init(max_iter, abs_tol=abs_tol)
228 call this%ksp_init(max_iter)
242 if (
allocated(this%p))
then
245 if (
allocated(this%q))
then
248 if (
allocated(this%r))
then
251 if (
allocated(this%s))
then
254 if (
allocated(this%u))
then
257 if (
allocated(this%w))
then
260 if (
allocated(this%z))
then
263 if (
allocated(this%mi))
then
266 if (
allocated(this%ni))
then
269 if (
allocated(this%alpha))
then
270 deallocate(this%alpha)
272 if (
allocated(this%beta))
then
273 deallocate(this%beta)
277 if (c_associated(this%p_d))
then
280 if (c_associated(this%q_d))
then
283 if (c_associated(this%r_d))
then
286 if (c_associated(this%s_d))
then
289 if (c_associated(this%u_d_d))
then
292 if (c_associated(this%w_d))
then
295 if (c_associated(this%z_d))
then
298 if (c_associated(this%mi_d))
then
301 if (c_associated(this%ni_d))
then
304 if (c_associated(this%alpha_d))
then
307 if (c_associated(this%beta_d))
then
310 if (
allocated(this%u_d))
then
312 if (c_associated(this%u_d(i)))
then
320 if (c_associated(this%gs_event))
then
329 class(
ax_t),
intent(inout) :: ax
330 type(
field_t),
intent(inout) :: x
331 integer,
intent(in) :: n
332 real(kind=
rp),
dimension(n),
intent(inout) :: f
333 type(
coef_t),
intent(inout) :: coef
335 type(
gs_t),
intent(inout) :: gs_h
337 integer,
optional,
intent(in) :: niter
338 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
339 real(kind=
rp) :: rnorm, rtr, reduction(3), norm_fac
340 real(kind=
rp) :: gamma1, gamma2, delta
341 real(kind=
rp) :: tmp1, tmp2, tmp3
342 type(mpi_request) :: request
343 type(mpi_status) :: status
347 if (
present(niter))
then
350 max_iter = this%max_iter
352 norm_fac = 1.0_rp / sqrt(coef%volume)
354 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
355 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
356 alpha => this%alpha, beta => this%beta, &
357 alpha_d => this%alpha_d, beta_d => this%beta_d, &
358 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
359 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
360 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
373 call this%M%solve(u(1,u_prev), r, n)
374 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
375 call gs_h%op(w, n, gs_op_add, this%gs_event)
380 rnorm = sqrt(rtr)*norm_fac
381 ksp_results%res_start = rnorm
382 ksp_results%res_final = rnorm
384 if(
abscmp(rnorm, 0.0_rp))
return
397 do iter = 1, max_iter
398 call mpi_iallreduce(mpi_in_place, reduction, 3, &
401 call this%M%solve(mi, w, n)
402 call ax%compute(ni, mi, coef, x%msh, x%Xh)
403 call gs_h%op(ni, n, gs_op_add, this%gs_event)
407 call mpi_wait(request, status, ierr)
409 gamma1 = reduction(1)
413 rnorm = sqrt(rtr)*norm_fac
414 if (rnorm .lt. this%abs_tol)
exit
417 if (iter .gt. 1)
then
418 beta(p_cur) = gamma1 / gamma2
419 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
422 alpha(p_cur) = gamma1/delta
426 s_d, u_d(u_prev), u_d(p_cur),&
428 mi_d, alpha(p_cur), beta(p_cur),&
429 coef%mult_d, reduction, n)
439 alpha(1) = alpha(p_cur)
440 beta(1) = beta(p_cur)
449 if ( p_cur .ne. 1)
then
456 ksp_results%res_final = rnorm
457 ksp_results%iter = iter
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
Defines a boundary condition.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
subroutine, public device_rzero(a_d, n)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
subroutine, public device_copy(a_d, b_d, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public ksp_max_iter
Maximum number of iters.
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
type(ksp_monitor_t) function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
Initialise a pipelined PCG solver.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
A list of boundary conditions.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.