Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pipecg_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, c_rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, abscmp
46 use device
47 use utils, only : neko_error
48 use comm, only : neko_comm, pe_size, mpi_iallreduce, mpi_status, &
49 mpi_real_precision, mpi_sum, mpi_in_place, mpi_request, &
50 mpi_wait
51 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, &
52 c_associated, c_size_t, c_sizeof, c_int, c_loc
53 implicit none
54 private
55
56 integer, parameter :: device_pipecg_p_space = 10
57
59 type, public, extends(ksp_t) :: pipecg_device_t
60 real(kind=rp), allocatable :: p(:)
61 real(kind=rp), allocatable :: q(:)
62 real(kind=rp), allocatable :: r(:)
63 real(kind=rp), allocatable :: s(:)
64 real(kind=rp), allocatable :: u(:,:)
65 real(kind=rp), allocatable :: w(:)
66 real(kind=rp), allocatable :: z(:)
67 real(kind=rp), allocatable :: mi(:)
68 real(kind=rp), allocatable :: ni(:)
69 real(kind=rp), allocatable :: alpha(:)
70 real(kind=rp), allocatable :: beta(:)
71 type(c_ptr) :: p_d = c_null_ptr
72 type(c_ptr) :: q_d = c_null_ptr
73 type(c_ptr) :: r_d = c_null_ptr
74 type(c_ptr) :: s_d = c_null_ptr
75 type(c_ptr) :: u_d_d = c_null_ptr
76 type(c_ptr) :: w_d = c_null_ptr
77 type(c_ptr) :: z_d = c_null_ptr
78 type(c_ptr) :: mi_d = c_null_ptr
79 type(c_ptr) :: ni_d = c_null_ptr
80 type(c_ptr) :: alpha_d = c_null_ptr
81 type(c_ptr) :: beta_d = c_null_ptr
82 type(c_ptr), allocatable :: u_d(:)
83 type(c_ptr) :: gs_event = c_null_ptr
84 contains
85 procedure, pass(this) :: init => pipecg_device_init
86 procedure, pass(this) :: free => pipecg_device_free
87 procedure, pass(this) :: solve => pipecg_device_solve
88 procedure, pass(this) :: solve_coupled => pipecg_device_solve_coupled
89 end type pipecg_device_t
90
91#ifdef HAVE_CUDA
92 interface
93 subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
94 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
95 bind(c, name='cuda_pipecg_vecops')
96 use, intrinsic :: iso_c_binding
97 import c_rp
98 implicit none
99 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
100 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
101 integer(c_int) :: n
102 real(c_rp) :: alpha, beta, reduction(3)
103 end subroutine cuda_pipecg_vecops
104 end interface
105
106 interface
107 subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
108 p_cur, p_space, n) &
109 bind(c, name='cuda_cg_update_xp')
110 use, intrinsic :: iso_c_binding
111 implicit none
112 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
113 integer(c_int) :: p_cur, n, p_space
114 end subroutine cuda_cg_update_xp
115 end interface
116#elif HAVE_HIP
117 interface
118 subroutine hip_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
119 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
120 bind(c, name='hip_pipecg_vecops')
121 use, intrinsic :: iso_c_binding
122 import c_rp
123 implicit none
124 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
125 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
126 integer(c_int) :: n
127 real(c_rp) :: alpha, beta, reduction(3)
128 end subroutine hip_pipecg_vecops
129 end interface
130
131 interface
132 subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
133 p_cur, p_space, n) &
134 bind(c, name='hip_cg_update_xp')
135 use, intrinsic :: iso_c_binding
136 implicit none
137 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
138 integer(c_int) :: p_cur, n, p_space
139 end subroutine hip_cg_update_xp
140 end interface
141#endif
142
143contains
144
145 subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
146 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
147 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
148 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
149 integer(c_int) :: n
150 real(c_rp) :: alpha, beta, reduction(3)
151#ifdef HAVE_HIP
152 call hip_pipecg_vecops(p_d, q_d, r_d,&
153 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
154#elif HAVE_CUDA
155 call cuda_pipecg_vecops(p_d, q_d, r_d,&
156 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
157#else
158 call neko_error('No device backend configured')
159#endif
160 end subroutine device_pipecg_vecops
161
162 subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
163 use, intrinsic :: iso_c_binding
164 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
165 integer(c_int) :: p_cur, n, p_space
166#ifdef HAVE_HIP
167 call hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
168#elif HAVE_CUDA
169 call cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
170#else
171 call neko_error('No device backend configured')
172#endif
173 end subroutine device_cg_update_xp
174
176 subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
177 class(pipecg_device_t), target, intent(inout) :: this
178 class(pc_t), optional, intent(in), target :: M
179 integer, intent(in) :: n
180 integer, intent(in) :: max_iter
181 real(kind=rp), optional, intent(in) :: rel_tol
182 real(kind=rp), optional, intent(in) :: abs_tol
183 logical, optional, intent(in) :: monitor
184 type(c_ptr) :: ptr
185 integer(c_size_t) :: u_size
186 integer :: i
187
188 call this%free()
189
190 allocate(this%p(n))
191 allocate(this%q(n))
192 allocate(this%r(n))
193 allocate(this%s(n))
194 allocate(this%u(n, device_pipecg_p_space+1))
195 allocate(this%u_d(device_pipecg_p_space+1))
196 allocate(this%w(n))
197 allocate(this%z(n))
198 allocate(this%mi(n))
199 allocate(this%ni(n))
200 allocate(this%alpha(device_pipecg_p_space))
201 allocate(this%beta(device_pipecg_p_space))
202
203 if (present(m)) then
204 this%M => m
205 end if
206
207 call device_map(this%p, this%p_d, n)
208 call device_map(this%q, this%q_d, n)
209 call device_map(this%r, this%r_d, n)
210 call device_map(this%s, this%s_d, n)
211 call device_map(this%w, this%w_d, n)
212 call device_map(this%z, this%z_d, n)
213 call device_map(this%mi, this%mi_d, n)
214 call device_map(this%ni, this%ni_d, n)
215 call device_map(this%alpha, this%alpha_d, device_pipecg_p_space)
216 call device_map(this%beta, this%beta_d, device_pipecg_p_space)
217 do i = 1, device_pipecg_p_space+1
218 this%u_d(i) = c_null_ptr
219 call device_map(this%u(:,i), this%u_d(i), n)
220 end do
221 !Did not work with 4 for some reason...
222 u_size = 8*(device_pipecg_p_space+1)
223 call device_alloc(this%u_d_d, u_size)
224 ptr = c_loc(this%u_d)
225 call device_memcpy(ptr,this%u_d_d, u_size, &
226 host_to_device, sync=.false.)
227
228 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
229 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
230 else if (present(rel_tol) .and. present(abs_tol)) then
231 call this%ksp_init(max_iter, rel_tol, abs_tol)
232 else if (present(monitor) .and. present(abs_tol)) then
233 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
234 else if (present(rel_tol) .and. present(monitor)) then
235 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
236 else if (present(rel_tol)) then
237 call this%ksp_init(max_iter, rel_tol = rel_tol)
238 else if (present(abs_tol)) then
239 call this%ksp_init(max_iter, abs_tol = abs_tol)
240 else if (present(monitor)) then
241 call this%ksp_init(max_iter, monitor = monitor)
242 else
243 call this%ksp_init(max_iter)
244 end if
245
246 call device_event_create(this%gs_event, 2)
247
248 end subroutine pipecg_device_init
249
251 subroutine pipecg_device_free(this)
252 class(pipecg_device_t), intent(inout) :: this
253 integer :: i
254
255 call this%ksp_free()
256
257 if (allocated(this%p)) then
258 deallocate(this%p)
259 end if
260 if (allocated(this%q)) then
261 deallocate(this%q)
262 end if
263 if (allocated(this%r)) then
264 deallocate(this%r)
265 end if
266 if (allocated(this%s)) then
267 deallocate(this%s)
268 end if
269 if (allocated(this%u)) then
270 deallocate(this%u)
271 end if
272 if (allocated(this%w)) then
273 deallocate(this%w)
274 end if
275 if (allocated(this%z)) then
276 deallocate(this%z)
277 end if
278 if (allocated(this%mi)) then
279 deallocate(this%mi)
280 end if
281 if (allocated(this%ni)) then
282 deallocate(this%ni)
283 end if
284 if (allocated(this%alpha)) then
285 deallocate(this%alpha)
286 end if
287 if (allocated(this%beta)) then
288 deallocate(this%beta)
289 end if
290
291
292 if (c_associated(this%p_d)) then
293 call device_free(this%p_d)
294 end if
295 if (c_associated(this%q_d)) then
296 call device_free(this%q_d)
297 end if
298 if (c_associated(this%r_d)) then
299 call device_free(this%r_d)
300 end if
301 if (c_associated(this%s_d)) then
302 call device_free(this%s_d)
303 end if
304 if (c_associated(this%u_d_d)) then
305 call device_free(this%u_d_d)
306 end if
307 if (c_associated(this%w_d)) then
308 call device_free(this%w_d)
309 end if
310 if (c_associated(this%z_d)) then
311 call device_free(this%z_d)
312 end if
313 if (c_associated(this%mi_d)) then
314 call device_free(this%mi_d)
315 end if
316 if (c_associated(this%ni_d)) then
317 call device_free(this%ni_d)
318 end if
319 if (c_associated(this%alpha_d)) then
320 call device_free(this%alpha_d)
321 end if
322 if (c_associated(this%beta_d)) then
323 call device_free(this%beta_d)
324 end if
325 if (allocated(this%u_d)) then
326 do i = 1, device_pipecg_p_space
327 if (c_associated(this%u_d(i))) then
328 call device_free(this%u_d(i))
329 end if
330 end do
331 end if
332
333 nullify(this%M)
334
335 if (c_associated(this%gs_event)) then
336 call device_event_destroy(this%gs_event)
337 end if
338
339 end subroutine pipecg_device_free
340
342 function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
343 class(pipecg_device_t), intent(inout) :: this
344 class(ax_t), intent(in) :: ax
345 type(field_t), intent(inout) :: x
346 integer, intent(in) :: n
347 real(kind=rp), dimension(n), intent(in) :: f
348 type(coef_t), intent(inout) :: coef
349 type(bc_list_t), intent(inout) :: blst
350 type(gs_t), intent(inout) :: gs_h
351 type(ksp_monitor_t) :: ksp_results
352 integer, optional, intent(in) :: niter
353 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
354 real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
355 real(kind=rp) :: gamma1, gamma2, delta
356 real(kind=rp) :: tmp1, tmp2, tmp3
357 type(mpi_request) :: request
358 type(mpi_status) :: status
359 type(c_ptr) :: f_d
360 f_d = device_get_ptr(f)
361
362 if (present(niter)) then
363 max_iter = niter
364 else
365 max_iter = this%max_iter
366 end if
367 norm_fac = 1.0_rp / sqrt(coef%volume)
368
369 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
370 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
371 alpha => this%alpha, beta => this%beta, &
372 alpha_d => this%alpha_d, beta_d => this%beta_d, &
373 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
374 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
375 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
376
377 p_prev = device_pipecg_p_space !this%p_space
378 u_prev = device_pipecg_p_space + 1 !this%p_space+1
379 p_cur = 1
380 call device_rzero(x%x_d, n)
381 call device_rzero(z_d, n)
382 call device_rzero(q_d, n)
383 call device_rzero(p_d, n)
384 call device_rzero(s_d, n)
385 call device_copy(r_d, f_d, n)
386 !apply u=M^-1r
387 !call device_copy(u_d(u_prev), r_d, n)
388 call this%M%solve(u(1,u_prev), r, n)
389 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
390 call gs_h%op(w, n, gs_op_add, this%gs_event)
391 call device_event_sync(this%gs_event)
392 call blst%apply_scalar(w, n)
393
394 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
395 rnorm = sqrt(rtr)*norm_fac
396 ksp_results%res_start = rnorm
397 ksp_results%res_final = rnorm
398 ksp_results%iter = 0
399 if(abscmp(rnorm, 0.0_rp)) return
400
401 gamma1 = 0.0_rp
402 tmp1 = 0.0_rp
403 tmp2 = 0.0_rp
404 tmp3 = 0.0_rp
405 tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n)
406 tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n)
407 tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n)
408 reduction(1) = tmp1
409 reduction(2) = tmp2
410 reduction(3) = tmp3
411
412 call this%monitor_start('PipeCG')
413 do iter = 1, max_iter
414 call mpi_iallreduce(mpi_in_place, reduction, 3, &
415 mpi_real_precision, mpi_sum, neko_comm, request, ierr)
416
417 call this%M%solve(mi, w, n)
418 call ax%compute(ni, mi, coef, x%msh, x%Xh)
419 call gs_h%op(ni, n, gs_op_add, this%gs_event)
420 call device_event_sync(this%gs_event)
421 call blst%apply(ni, n)
422
423 call mpi_wait(request, status, ierr)
424 gamma2 = gamma1
425 gamma1 = reduction(1)
426 delta = reduction(2)
427 rtr = reduction(3)
428
429 rnorm = sqrt(rtr)*norm_fac
430 call this%monitor_iter(iter, rnorm)
431 if (rnorm .lt. this%abs_tol) exit
432
433
434 if (iter .gt. 1) then
435 beta(p_cur) = gamma1 / gamma2
436 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
437 else
438 beta(p_cur) = 0.0_rp
439 alpha(p_cur) = gamma1/delta
440 end if
441
442 call device_pipecg_vecops(p_d, q_d, r_d,&
443 s_d, u_d(u_prev), u_d(p_cur),&
444 w_d, z_d, ni_d,&
445 mi_d, alpha(p_cur), beta(p_cur),&
446 coef%mult_d, reduction, n)
447 if (p_cur .eq. device_pipecg_p_space) then
448 call device_memcpy(alpha, alpha_d, p_cur, &
449 host_to_device, sync=.false.)
450 call device_memcpy(beta, beta_d, p_cur, &
451 host_to_device, sync=.false.)
452 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
454 p_prev = p_cur
455 u_prev = device_pipecg_p_space + 1
456 alpha(1) = alpha(p_cur)
457 beta(1) = beta(p_cur)
458 p_cur = 1
459 else
460 u_prev = p_cur
461 p_prev = p_cur
462 p_cur = p_cur + 1
463 end if
464 end do
465
466 if ( p_cur .ne. 1) then
467 call device_memcpy(alpha, alpha_d, p_cur, host_to_device, sync=.false.)
468 call device_memcpy(beta, beta_d, p_cur, host_to_device, sync=.false.)
469 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
471 end if
472 call this%monitor_stop()
473 ksp_results%res_final = rnorm
474 ksp_results%iter = iter
475 ksp_results%converged = this%is_converged(iter, rnorm)
476
477 end associate
478
479 end function pipecg_device_solve
480
482 function pipecg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
483 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
484 class(pipecg_device_t), intent(inout) :: this
485 class(ax_t), intent(in) :: ax
486 type(field_t), intent(inout) :: x
487 type(field_t), intent(inout) :: y
488 type(field_t), intent(inout) :: z
489 integer, intent(in) :: n
490 real(kind=rp), dimension(n), intent(in) :: fx
491 real(kind=rp), dimension(n), intent(in) :: fy
492 real(kind=rp), dimension(n), intent(in) :: fz
493 type(coef_t), intent(inout) :: coef
494 type(bc_list_t), intent(inout) :: blstx
495 type(bc_list_t), intent(inout) :: blsty
496 type(bc_list_t), intent(inout) :: blstz
497 type(gs_t), intent(inout) :: gs_h
498 type(ksp_monitor_t), dimension(3) :: ksp_results
499 integer, optional, intent(in) :: niter
500
501 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
502 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
503 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
504
505 end function pipecg_device_solve_coupled
506
507end module pipecg_device
Return the device pointer for an associated Fortran array.
Definition device.F90:92
Map a Fortran array to a device (allocate and associate)
Definition device.F90:68
Copy data between host and device (or device and device)
Definition device.F90:62
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
integer pe_size
MPI size of communicator.
Definition comm.F90:31
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1241
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:197
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1206
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:176
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1176
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.
Definition precon.f90:40