Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pipecg_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, c_rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, abscmp
46 use device
47 use comm
48 implicit none
49 private
50
51 integer, parameter :: device_pipecg_p_space = 10
52
54 type, public, extends(ksp_t) :: pipecg_device_t
55 real(kind=rp), allocatable :: p(:)
56 real(kind=rp), allocatable :: q(:)
57 real(kind=rp), allocatable :: r(:)
58 real(kind=rp), allocatable :: s(:)
59 real(kind=rp), allocatable :: u(:,:)
60 real(kind=rp), allocatable :: w(:)
61 real(kind=rp), allocatable :: z(:)
62 real(kind=rp), allocatable :: mi(:)
63 real(kind=rp), allocatable :: ni(:)
64 real(kind=rp), allocatable :: alpha(:)
65 real(kind=rp), allocatable :: beta(:)
66 type(c_ptr) :: p_d = c_null_ptr
67 type(c_ptr) :: q_d = c_null_ptr
68 type(c_ptr) :: r_d = c_null_ptr
69 type(c_ptr) :: s_d = c_null_ptr
70 type(c_ptr) :: u_d_d = c_null_ptr
71 type(c_ptr) :: w_d = c_null_ptr
72 type(c_ptr) :: z_d = c_null_ptr
73 type(c_ptr) :: mi_d = c_null_ptr
74 type(c_ptr) :: ni_d = c_null_ptr
75 type(c_ptr) :: alpha_d = c_null_ptr
76 type(c_ptr) :: beta_d = c_null_ptr
77 type(c_ptr), allocatable :: u_d(:)
78 type(c_ptr) :: gs_event = c_null_ptr
79 contains
80 procedure, pass(this) :: init => pipecg_device_init
81 procedure, pass(this) :: free => pipecg_device_free
82 procedure, pass(this) :: solve => pipecg_device_solve
83 procedure, pass(this) :: solve_coupled => pipecg_device_solve_coupled
84 end type pipecg_device_t
85
86#ifdef HAVE_CUDA
87 interface
88 subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
89 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
90 bind(c, name='cuda_pipecg_vecops')
91 use, intrinsic :: iso_c_binding
92 import c_rp
93 implicit none
94 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
95 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
96 integer(c_int) :: n
97 real(c_rp) :: alpha, beta, reduction(3)
98 end subroutine cuda_pipecg_vecops
99 end interface
100
101 interface
102 subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
103 p_cur, p_space, n) &
104 bind(c, name='cuda_cg_update_xp')
105 use, intrinsic :: iso_c_binding
106 implicit none
107 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
108 integer(c_int) :: p_cur, n, p_space
109 end subroutine cuda_cg_update_xp
110 end interface
111#elif HAVE_HIP
112 interface
113 subroutine hip_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
114 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
115 bind(c, name='hip_pipecg_vecops')
116 use, intrinsic :: iso_c_binding
117 import c_rp
118 implicit none
119 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
120 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
121 integer(c_int) :: n
122 real(c_rp) :: alpha, beta, reduction(3)
123 end subroutine hip_pipecg_vecops
124 end interface
125
126 interface
127 subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
128 p_cur, p_space, n) &
129 bind(c, name='hip_cg_update_xp')
130 use, intrinsic :: iso_c_binding
131 implicit none
132 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
133 integer(c_int) :: p_cur, n, p_space
134 end subroutine hip_cg_update_xp
135 end interface
136#endif
137
138contains
139
140 subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
141 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
142 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
143 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
144 integer(c_int) :: n
145 real(c_rp) :: alpha, beta, reduction(3)
146#ifdef HAVE_HIP
147 call hip_pipecg_vecops(p_d, q_d, r_d,&
148 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
149#elif HAVE_CUDA
150 call cuda_pipecg_vecops(p_d, q_d, r_d,&
151 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
152#else
153 call neko_error('No device backend configured')
154#endif
155 end subroutine device_pipecg_vecops
156
157 subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
158 use, intrinsic :: iso_c_binding
159 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
160 integer(c_int) :: p_cur, n, p_space
161#ifdef HAVE_HIP
162 call hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
163#elif HAVE_CUDA
164 call cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
165#else
166 call neko_error('No device backend configured')
167#endif
168 end subroutine device_cg_update_xp
169
171 subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
172 class(pipecg_device_t), target, intent(inout) :: this
173 class(pc_t), optional, intent(in), target :: M
174 integer, intent(in) :: n
175 integer, intent(in) :: max_iter
176 real(kind=rp), optional, intent(in) :: rel_tol
177 real(kind=rp), optional, intent(in) :: abs_tol
178 logical, optional, intent(in) :: monitor
179 type(c_ptr) :: ptr
180 integer(c_size_t) :: u_size
181 integer :: i
182
183 call this%free()
184
185 allocate(this%p(n))
186 allocate(this%q(n))
187 allocate(this%r(n))
188 allocate(this%s(n))
189 allocate(this%u(n, device_pipecg_p_space+1))
190 allocate(this%u_d(device_pipecg_p_space+1))
191 allocate(this%w(n))
192 allocate(this%z(n))
193 allocate(this%mi(n))
194 allocate(this%ni(n))
195 allocate(this%alpha(device_pipecg_p_space))
196 allocate(this%beta(device_pipecg_p_space))
197
198 if (present(m)) then
199 this%M => m
200 end if
201
202 call device_map(this%p, this%p_d, n)
203 call device_map(this%q, this%q_d, n)
204 call device_map(this%r, this%r_d, n)
205 call device_map(this%s, this%s_d, n)
206 call device_map(this%w, this%w_d, n)
207 call device_map(this%z, this%z_d, n)
208 call device_map(this%mi, this%mi_d, n)
209 call device_map(this%ni, this%ni_d, n)
210 call device_map(this%alpha, this%alpha_d, device_pipecg_p_space)
211 call device_map(this%beta, this%beta_d, device_pipecg_p_space)
212 do i = 1, device_pipecg_p_space+1
213 this%u_d(i) = c_null_ptr
214 call device_map(this%u(:,i), this%u_d(i), n)
215 end do
216 !Did not work with 4 for some reason...
217 u_size = 8*(device_pipecg_p_space+1)
218 call device_alloc(this%u_d_d, u_size)
219 ptr = c_loc(this%u_d)
220 call device_memcpy(ptr,this%u_d_d, u_size, &
221 host_to_device, sync=.false.)
222
223 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
224 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
225 else if (present(rel_tol) .and. present(abs_tol)) then
226 call this%ksp_init(max_iter, rel_tol, abs_tol)
227 else if (present(monitor) .and. present(abs_tol)) then
228 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
229 else if (present(rel_tol) .and. present(monitor)) then
230 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
231 else if (present(rel_tol)) then
232 call this%ksp_init(max_iter, rel_tol = rel_tol)
233 else if (present(abs_tol)) then
234 call this%ksp_init(max_iter, abs_tol = abs_tol)
235 else if (present(monitor)) then
236 call this%ksp_init(max_iter, monitor = monitor)
237 else
238 call this%ksp_init(max_iter)
239 end if
240
241 call device_event_create(this%gs_event, 2)
242
243 end subroutine pipecg_device_init
244
246 subroutine pipecg_device_free(this)
247 class(pipecg_device_t), intent(inout) :: this
248 integer :: i
249
250 call this%ksp_free()
251
252 if (allocated(this%p)) then
253 deallocate(this%p)
254 end if
255 if (allocated(this%q)) then
256 deallocate(this%q)
257 end if
258 if (allocated(this%r)) then
259 deallocate(this%r)
260 end if
261 if (allocated(this%s)) then
262 deallocate(this%s)
263 end if
264 if (allocated(this%u)) then
265 deallocate(this%u)
266 end if
267 if (allocated(this%w)) then
268 deallocate(this%w)
269 end if
270 if (allocated(this%z)) then
271 deallocate(this%z)
272 end if
273 if (allocated(this%mi)) then
274 deallocate(this%mi)
275 end if
276 if (allocated(this%ni)) then
277 deallocate(this%ni)
278 end if
279 if (allocated(this%alpha)) then
280 deallocate(this%alpha)
281 end if
282 if (allocated(this%beta)) then
283 deallocate(this%beta)
284 end if
285
286
287 if (c_associated(this%p_d)) then
288 call device_free(this%p_d)
289 end if
290 if (c_associated(this%q_d)) then
291 call device_free(this%q_d)
292 end if
293 if (c_associated(this%r_d)) then
294 call device_free(this%r_d)
295 end if
296 if (c_associated(this%s_d)) then
297 call device_free(this%s_d)
298 end if
299 if (c_associated(this%u_d_d)) then
300 call device_free(this%u_d_d)
301 end if
302 if (c_associated(this%w_d)) then
303 call device_free(this%w_d)
304 end if
305 if (c_associated(this%z_d)) then
306 call device_free(this%z_d)
307 end if
308 if (c_associated(this%mi_d)) then
309 call device_free(this%mi_d)
310 end if
311 if (c_associated(this%ni_d)) then
312 call device_free(this%ni_d)
313 end if
314 if (c_associated(this%alpha_d)) then
315 call device_free(this%alpha_d)
316 end if
317 if (c_associated(this%beta_d)) then
318 call device_free(this%beta_d)
319 end if
320 if (allocated(this%u_d)) then
321 do i = 1, device_pipecg_p_space
322 if (c_associated(this%u_d(i))) then
323 call device_free(this%u_d(i))
324 end if
325 end do
326 end if
327
328 nullify(this%M)
329
330 if (c_associated(this%gs_event)) then
331 call device_event_destroy(this%gs_event)
332 end if
333
334 end subroutine pipecg_device_free
335
337 function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
338 class(pipecg_device_t), intent(inout) :: this
339 class(ax_t), intent(in) :: ax
340 type(field_t), intent(inout) :: x
341 integer, intent(in) :: n
342 real(kind=rp), dimension(n), intent(in) :: f
343 type(coef_t), intent(inout) :: coef
344 type(bc_list_t), intent(inout) :: blst
345 type(gs_t), intent(inout) :: gs_h
346 type(ksp_monitor_t) :: ksp_results
347 integer, optional, intent(in) :: niter
348 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
349 real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
350 real(kind=rp) :: gamma1, gamma2, delta
351 real(kind=rp) :: tmp1, tmp2, tmp3
352 type(mpi_request) :: request
353 type(mpi_status) :: status
354 type(c_ptr) :: f_d
355 f_d = device_get_ptr(f)
356
357 if (present(niter)) then
358 max_iter = niter
359 else
360 max_iter = this%max_iter
361 end if
362 norm_fac = 1.0_rp / sqrt(coef%volume)
363
364 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
365 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
366 alpha => this%alpha, beta => this%beta, &
367 alpha_d => this%alpha_d, beta_d => this%beta_d, &
368 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
369 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
370 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
371
372 p_prev = device_pipecg_p_space !this%p_space
373 u_prev = device_pipecg_p_space + 1 !this%p_space+1
374 p_cur = 1
375 call device_rzero(x%x_d, n)
376 call device_rzero(z_d, n)
377 call device_rzero(q_d, n)
378 call device_rzero(p_d, n)
379 call device_rzero(s_d, n)
380 call device_copy(r_d, f_d, n)
381 !apply u=M^-1r
382 !call device_copy(u_d(u_prev), r_d, n)
383 call this%M%solve(u(1,u_prev), r, n)
384 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
385 call gs_h%op(w, n, gs_op_add, this%gs_event)
386 call device_event_sync(this%gs_event)
387 call blst%apply_scalar(w, n)
388
389 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
390 rnorm = sqrt(rtr)*norm_fac
391 ksp_results%res_start = rnorm
392 ksp_results%res_final = rnorm
393 ksp_results%iter = 0
394 if(abscmp(rnorm, 0.0_rp)) return
395
396 gamma1 = 0.0_rp
397 tmp1 = 0.0_rp
398 tmp2 = 0.0_rp
399 tmp3 = 0.0_rp
400 tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n)
401 tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n)
402 tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n)
403 reduction(1) = tmp1
404 reduction(2) = tmp2
405 reduction(3) = tmp3
406
407 call this%monitor_start('PipeCG')
408 do iter = 1, max_iter
409 call mpi_iallreduce(mpi_in_place, reduction, 3, &
410 mpi_real_precision, mpi_sum, neko_comm, request, ierr)
411
412 call this%M%solve(mi, w, n)
413 call ax%compute(ni, mi, coef, x%msh, x%Xh)
414 call gs_h%op(ni, n, gs_op_add, this%gs_event)
415 call device_event_sync(this%gs_event)
416 call blst%apply(ni, n)
417
418 call mpi_wait(request, status, ierr)
419 gamma2 = gamma1
420 gamma1 = reduction(1)
421 delta = reduction(2)
422 rtr = reduction(3)
423
424 rnorm = sqrt(rtr)*norm_fac
425 call this%monitor_iter(iter, rnorm)
426 if (rnorm .lt. this%abs_tol) exit
427
428
429 if (iter .gt. 1) then
430 beta(p_cur) = gamma1 / gamma2
431 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
432 else
433 beta(p_cur) = 0.0_rp
434 alpha(p_cur) = gamma1/delta
435 end if
436
437 call device_pipecg_vecops(p_d, q_d, r_d,&
438 s_d, u_d(u_prev), u_d(p_cur),&
439 w_d, z_d, ni_d,&
440 mi_d, alpha(p_cur), beta(p_cur),&
441 coef%mult_d, reduction, n)
442 if (p_cur .eq. device_pipecg_p_space) then
443 call device_memcpy(alpha, alpha_d, p_cur, &
444 host_to_device, sync=.false.)
445 call device_memcpy(beta, beta_d, p_cur, &
446 host_to_device, sync=.false.)
447 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
449 p_prev = p_cur
450 u_prev = device_pipecg_p_space + 1
451 alpha(1) = alpha(p_cur)
452 beta(1) = beta(p_cur)
453 p_cur = 1
454 else
455 u_prev = p_cur
456 p_prev = p_cur
457 p_cur = p_cur + 1
458 end if
459 end do
460
461 if ( p_cur .ne. 1) then
462 call device_memcpy(alpha, alpha_d, p_cur, host_to_device, sync=.false.)
463 call device_memcpy(beta, beta_d, p_cur, host_to_device, sync=.false.)
464 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
466 end if
467 call this%monitor_stop()
468 ksp_results%res_final = rnorm
469 ksp_results%iter = iter
470 ksp_results%converged = this%is_converged(iter, rnorm)
471
472 end associate
473
474 end function pipecg_device_solve
475
477 function pipecg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
478 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
479 class(pipecg_device_t), intent(inout) :: this
480 class(ax_t), intent(in) :: ax
481 type(field_t), intent(inout) :: x
482 type(field_t), intent(inout) :: y
483 type(field_t), intent(inout) :: z
484 integer, intent(in) :: n
485 real(kind=rp), dimension(n), intent(in) :: fx
486 real(kind=rp), dimension(n), intent(in) :: fy
487 real(kind=rp), dimension(n), intent(in) :: fz
488 type(coef_t), intent(inout) :: coef
489 type(bc_list_t), intent(inout) :: blstx
490 type(bc_list_t), intent(inout) :: blsty
491 type(bc_list_t), intent(inout) :: blstz
492 type(gs_t), intent(inout) :: gs_h
493 type(ksp_monitor_t), dimension(3) :: ksp_results
494 integer, optional, intent(in) :: niter
495
496 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
497 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
498 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
499
500 end function pipecg_device_solve_coupled
501
502end module pipecg_device
503
504
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1229
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1194
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:164
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1164
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Krylov preconditioner.
Definition precon.f90:34
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.
Definition precon.f90:40