Neko  0.8.99
A portable framework for high-order spectral element flow simulations
pipecg_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
36  use precon, only : pc_t
37  use ax_product, only : ax_t
38  use num_types, only: rp, c_rp
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : glsc3, rzero, copy, abscmp
44  use device_math, only : device_rzero, device_copy, &
46  use device
47  use comm
48  implicit none
49  private
50 
51  integer, parameter :: device_pipecg_p_space = 10
52 
54  type, public, extends(ksp_t) :: pipecg_device_t
55  real(kind=rp), allocatable :: p(:)
56  real(kind=rp), allocatable :: q(:)
57  real(kind=rp), allocatable :: r(:)
58  real(kind=rp), allocatable :: s(:)
59  real(kind=rp), allocatable :: u(:,:)
60  real(kind=rp), allocatable :: w(:)
61  real(kind=rp), allocatable :: z(:)
62  real(kind=rp), allocatable :: mi(:)
63  real(kind=rp), allocatable :: ni(:)
64  real(kind=rp), allocatable :: alpha(:)
65  real(kind=rp), allocatable :: beta(:)
66  type(c_ptr) :: p_d = c_null_ptr
67  type(c_ptr) :: q_d = c_null_ptr
68  type(c_ptr) :: r_d = c_null_ptr
69  type(c_ptr) :: s_d = c_null_ptr
70  type(c_ptr) :: u_d_d = c_null_ptr
71  type(c_ptr) :: w_d = c_null_ptr
72  type(c_ptr) :: z_d = c_null_ptr
73  type(c_ptr) :: mi_d = c_null_ptr
74  type(c_ptr) :: ni_d = c_null_ptr
75  type(c_ptr) :: alpha_d = c_null_ptr
76  type(c_ptr) :: beta_d = c_null_ptr
77  type(c_ptr), allocatable :: u_d(:)
78  type(c_ptr) :: gs_event = c_null_ptr
79  contains
80  procedure, pass(this) :: init => pipecg_device_init
81  procedure, pass(this) :: free => pipecg_device_free
82  procedure, pass(this) :: solve => pipecg_device_solve
83  procedure, pass(this) :: solve_coupled => pipecg_device_solve_coupled
84  end type pipecg_device_t
85 
86 #ifdef HAVE_CUDA
87  interface
88  subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
89  w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
90  bind(c, name='cuda_pipecg_vecops')
91  use, intrinsic :: iso_c_binding
92  import c_rp
93  implicit none
94  type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
95  type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
96  integer(c_int) :: n
97  real(c_rp) :: alpha, beta, reduction(3)
98  end subroutine cuda_pipecg_vecops
99  end interface
100 
101  interface
102  subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
103  p_cur, p_space, n) &
104  bind(c, name='cuda_cg_update_xp')
105  use, intrinsic :: iso_c_binding
106  implicit none
107  type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
108  integer(c_int) :: p_cur, n, p_space
109  end subroutine cuda_cg_update_xp
110  end interface
111 #elif HAVE_HIP
112  interface
113  subroutine hip_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
114  w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
115  bind(c, name='hip_pipecg_vecops')
116  use, intrinsic :: iso_c_binding
117  import c_rp
118  implicit none
119  type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
120  type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
121  integer(c_int) :: n
122  real(c_rp) :: alpha, beta, reduction(3)
123  end subroutine hip_pipecg_vecops
124  end interface
125 
126  interface
127  subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
128  p_cur, p_space, n) &
129  bind(c, name='hip_cg_update_xp')
130  use, intrinsic :: iso_c_binding
131  implicit none
132  type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
133  integer(c_int) :: p_cur, n, p_space
134  end subroutine hip_cg_update_xp
135  end interface
136 #endif
137 
138 contains
139 
140  subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
141  w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
142  type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
143  type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
144  integer(c_int) :: n
145  real(c_rp) :: alpha, beta, reduction(3)
146 #ifdef HAVE_HIP
147  call hip_pipecg_vecops(p_d, q_d, r_d,&
148  s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
149 #elif HAVE_CUDA
150  call cuda_pipecg_vecops(p_d, q_d, r_d,&
151  s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
152 #else
153  call neko_error('No device backend configured')
154 #endif
155  end subroutine device_pipecg_vecops
156 
157  subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
158  use, intrinsic :: iso_c_binding
159  type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
160  integer(c_int) :: p_cur, n, p_space
161 #ifdef HAVE_HIP
162  call hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
163 #elif HAVE_CUDA
164  call cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
165 #else
166  call neko_error('No device backend configured')
167 #endif
168  end subroutine device_cg_update_xp
169 
171  subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
172  class(pipecg_device_t), target, intent(inout) :: this
173  class(pc_t), optional, intent(inout), target :: M
174  integer, intent(in) :: n
175  integer, intent(in) :: max_iter
176  real(kind=rp), optional, intent(inout) :: rel_tol
177  real(kind=rp), optional, intent(inout) :: abs_tol
178  logical, optional, intent(in) :: monitor
179  type(c_ptr) :: ptr
180  integer(c_size_t) :: u_size
181  integer :: i
182 
183  call this%free()
184 
185  allocate(this%p(n))
186  allocate(this%q(n))
187  allocate(this%r(n))
188  allocate(this%s(n))
189  allocate(this%u(n, device_pipecg_p_space+1))
190  allocate(this%u_d(device_pipecg_p_space+1))
191  allocate(this%w(n))
192  allocate(this%z(n))
193  allocate(this%mi(n))
194  allocate(this%ni(n))
195  allocate(this%alpha(device_pipecg_p_space))
196  allocate(this%beta(device_pipecg_p_space))
197 
198  if (present(m)) then
199  this%M => m
200  end if
201 
202  call device_map(this%p, this%p_d, n)
203  call device_map(this%q, this%q_d, n)
204  call device_map(this%r, this%r_d, n)
205  call device_map(this%s, this%s_d, n)
206  call device_map(this%w, this%w_d, n)
207  call device_map(this%z, this%z_d, n)
208  call device_map(this%mi, this%mi_d, n)
209  call device_map(this%ni, this%ni_d, n)
210  call device_map(this%alpha, this%alpha_d, device_pipecg_p_space)
211  call device_map(this%beta, this%beta_d, device_pipecg_p_space)
212  do i = 1, device_pipecg_p_space+1
213  this%u_d(i) = c_null_ptr
214  call device_map(this%u(:,i), this%u_d(i), n)
215  end do
216  !Did not work with 4 for some reason...
217  u_size = 8*(device_pipecg_p_space+1)
218  call device_alloc(this%u_d_d, u_size)
219  ptr = c_loc(this%u_d)
220  call device_memcpy(ptr,this%u_d_d, u_size, &
221  host_to_device, sync=.false.)
222 
223  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
224  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
225  else if (present(rel_tol) .and. present(abs_tol)) then
226  call this%ksp_init(max_iter, rel_tol, abs_tol)
227  else if (present(monitor) .and. present(abs_tol)) then
228  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
229  else if (present(rel_tol) .and. present(monitor)) then
230  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
231  else if (present(rel_tol)) then
232  call this%ksp_init(max_iter, rel_tol = rel_tol)
233  else if (present(abs_tol)) then
234  call this%ksp_init(max_iter, abs_tol = abs_tol)
235  else if (present(monitor)) then
236  call this%ksp_init(max_iter, monitor = monitor)
237  else
238  call this%ksp_init(max_iter)
239  end if
240 
241  call device_event_create(this%gs_event, 2)
242 
243  end subroutine pipecg_device_init
244 
246  subroutine pipecg_device_free(this)
247  class(pipecg_device_t), intent(inout) :: this
248  integer :: i
249 
250  call this%ksp_free()
251 
252  if (allocated(this%p)) then
253  deallocate(this%p)
254  end if
255  if (allocated(this%q)) then
256  deallocate(this%q)
257  end if
258  if (allocated(this%r)) then
259  deallocate(this%r)
260  end if
261  if (allocated(this%s)) then
262  deallocate(this%s)
263  end if
264  if (allocated(this%u)) then
265  deallocate(this%u)
266  end if
267  if (allocated(this%w)) then
268  deallocate(this%w)
269  end if
270  if (allocated(this%z)) then
271  deallocate(this%z)
272  end if
273  if (allocated(this%mi)) then
274  deallocate(this%mi)
275  end if
276  if (allocated(this%ni)) then
277  deallocate(this%ni)
278  end if
279  if (allocated(this%alpha)) then
280  deallocate(this%alpha)
281  end if
282  if (allocated(this%beta)) then
283  deallocate(this%beta)
284  end if
285 
286 
287  if (c_associated(this%p_d)) then
288  call device_free(this%p_d)
289  end if
290  if (c_associated(this%q_d)) then
291  call device_free(this%q_d)
292  end if
293  if (c_associated(this%r_d)) then
294  call device_free(this%r_d)
295  end if
296  if (c_associated(this%s_d)) then
297  call device_free(this%s_d)
298  end if
299  if (c_associated(this%u_d_d)) then
300  call device_free(this%u_d_d)
301  end if
302  if (c_associated(this%w_d)) then
303  call device_free(this%w_d)
304  end if
305  if (c_associated(this%z_d)) then
306  call device_free(this%z_d)
307  end if
308  if (c_associated(this%mi_d)) then
309  call device_free(this%mi_d)
310  end if
311  if (c_associated(this%ni_d)) then
312  call device_free(this%ni_d)
313  end if
314  if (c_associated(this%alpha_d)) then
315  call device_free(this%alpha_d)
316  end if
317  if (c_associated(this%beta_d)) then
318  call device_free(this%beta_d)
319  end if
320  if (allocated(this%u_d)) then
321  do i = 1, device_pipecg_p_space
322  if (c_associated(this%u_d(i))) then
323  call device_free(this%u_d(i))
324  end if
325  end do
326  end if
327 
328  nullify(this%M)
329 
330  if (c_associated(this%gs_event)) then
331  call device_event_destroy(this%gs_event)
332  end if
333 
334  end subroutine pipecg_device_free
335 
337  function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
338  class(pipecg_device_t), intent(inout) :: this
339  class(ax_t), intent(inout) :: ax
340  type(field_t), intent(inout) :: x
341  integer, intent(in) :: n
342  real(kind=rp), dimension(n), intent(inout) :: f
343  type(coef_t), intent(inout) :: coef
344  type(bc_list_t), intent(inout) :: blst
345  type(gs_t), intent(inout) :: gs_h
346  type(ksp_monitor_t) :: ksp_results
347  integer, optional, intent(in) :: niter
348  integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
349  real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
350  real(kind=rp) :: gamma1, gamma2, delta
351  real(kind=rp) :: tmp1, tmp2, tmp3
352  type(mpi_request) :: request
353  type(mpi_status) :: status
354  type(c_ptr) :: f_d
355  f_d = device_get_ptr(f)
356 
357  if (present(niter)) then
358  max_iter = niter
359  else
360  max_iter = this%max_iter
361  end if
362  norm_fac = 1.0_rp / sqrt(coef%volume)
363 
364  associate(p => this%p, q => this%q, r => this%r, s => this%s, &
365  u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
366  alpha => this%alpha, beta => this%beta, &
367  alpha_d => this%alpha_d, beta_d => this%beta_d, &
368  p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
369  s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
370  w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
371 
372  p_prev = device_pipecg_p_space !this%p_space
373  u_prev = device_pipecg_p_space + 1 !this%p_space+1
374  p_cur = 1
375  call device_rzero(x%x_d, n)
376  call device_rzero(z_d, n)
377  call device_rzero(q_d, n)
378  call device_rzero(p_d, n)
379  call device_rzero(s_d, n)
380  call device_copy(r_d, f_d, n)
381  !apply u=M^-1r
382  !call device_copy(u_d(u_prev), r_d, n)
383  call this%M%solve(u(1,u_prev), r, n)
384  call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
385  call gs_h%op(w, n, gs_op_add, this%gs_event)
386  call device_event_sync(this%gs_event)
387  call bc_list_apply(blst, w, n)
388 
389  rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
390  rnorm = sqrt(rtr)*norm_fac
391  ksp_results%res_start = rnorm
392  ksp_results%res_final = rnorm
393  ksp_results%iter = 0
394  if(abscmp(rnorm, 0.0_rp)) return
395 
396  gamma1 = 0.0_rp
397  tmp1 = 0.0_rp
398  tmp2 = 0.0_rp
399  tmp3 = 0.0_rp
400  tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n)
401  tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n)
402  tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n)
403  reduction(1) = tmp1
404  reduction(2) = tmp2
405  reduction(3) = tmp3
406 
407  call this%monitor_start('PipeCG')
408  do iter = 1, max_iter
409  call mpi_iallreduce(mpi_in_place, reduction, 3, &
410  mpi_real_precision, mpi_sum, neko_comm, request, ierr)
411 
412  call this%M%solve(mi, w, n)
413  call ax%compute(ni, mi, coef, x%msh, x%Xh)
414  call gs_h%op(ni, n, gs_op_add, this%gs_event)
415  call device_event_sync(this%gs_event)
416  call bc_list_apply(blst, ni, n)
417 
418  call mpi_wait(request, status, ierr)
419  gamma2 = gamma1
420  gamma1 = reduction(1)
421  delta = reduction(2)
422  rtr = reduction(3)
423 
424  rnorm = sqrt(rtr)*norm_fac
425  call this%monitor_iter(iter, rnorm)
426  if (rnorm .lt. this%abs_tol) exit
427 
428 
429  if (iter .gt. 1) then
430  beta(p_cur) = gamma1 / gamma2
431  alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
432  else
433  beta(p_cur) = 0.0_rp
434  alpha(p_cur) = gamma1/delta
435  end if
436 
437  call device_pipecg_vecops(p_d, q_d, r_d,&
438  s_d, u_d(u_prev), u_d(p_cur),&
439  w_d, z_d, ni_d,&
440  mi_d, alpha(p_cur), beta(p_cur),&
441  coef%mult_d, reduction, n)
442  if (p_cur .eq. device_pipecg_p_space) then
443  call device_memcpy(alpha, alpha_d, p_cur, &
444  host_to_device, sync=.false.)
445  call device_memcpy(beta, beta_d, p_cur, &
446  host_to_device, sync=.false.)
447  call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
449  p_prev = p_cur
450  u_prev = device_pipecg_p_space + 1
451  alpha(1) = alpha(p_cur)
452  beta(1) = beta(p_cur)
453  p_cur = 1
454  else
455  u_prev = p_cur
456  p_prev = p_cur
457  p_cur = p_cur + 1
458  end if
459  end do
460 
461  if ( p_cur .ne. 1) then
462  call device_memcpy(alpha, alpha_d, p_cur, host_to_device, sync=.false.)
463  call device_memcpy(beta, beta_d, p_cur, host_to_device, sync=.false.)
464  call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
466  end if
467  call this%monitor_stop()
468  ksp_results%res_final = rnorm
469  ksp_results%iter = iter
470 
471  end associate
472 
473  end function pipecg_device_solve
474 
476  function pipecg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
477  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
478  class(pipecg_device_t), intent(inout) :: this
479  class(ax_t), intent(inout) :: ax
480  type(field_t), intent(inout) :: x
481  type(field_t), intent(inout) :: y
482  type(field_t), intent(inout) :: z
483  integer, intent(in) :: n
484  real(kind=rp), dimension(n), intent(inout) :: fx
485  real(kind=rp), dimension(n), intent(inout) :: fy
486  real(kind=rp), dimension(n), intent(inout) :: fz
487  type(coef_t), intent(inout) :: coef
488  type(bc_list_t), intent(inout) :: blstx
489  type(bc_list_t), intent(inout) :: blsty
490  type(bc_list_t), intent(inout) :: blstz
491  type(gs_t), intent(inout) :: gs_h
492  type(ksp_monitor_t), dimension(3) :: ksp_results
493  integer, optional, intent(in) :: niter
494 
495  ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
496  ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
497  ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
498 
499  end function pipecg_device_solve_coupled
500 
501 end module pipecg_device
502 
503 
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:23
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Definition: device_math.F90:76
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition: device.F90:1229
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition: device.F90:1194
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition: device.F90:164
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition: device.F90:1164
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:51
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:895
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
integer, parameter, public c_rp
Definition: num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
Krylov preconditioner.
Definition: precon.f90:34
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
Definition: pipecg_aux.hip:51
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Definition: pipecg_aux.hip:64
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40