Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pipecg_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, c_rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, abscmp
46 use device
47 use utils, only : neko_error
49 use mpi_f08, only : mpi_iallreduce, mpi_status, &
50 mpi_sum, mpi_in_place, mpi_request, mpi_wait
51 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, &
52 c_associated, c_size_t, c_sizeof, c_int, c_loc
53 implicit none
54 private
55
56 integer, parameter :: device_pipecg_p_space = 10
57
59 type, public, extends(ksp_t) :: pipecg_device_t
60 real(kind=rp), allocatable :: p(:)
61 real(kind=rp), allocatable :: q(:)
62 real(kind=rp), allocatable :: r(:)
63 real(kind=rp), allocatable :: s(:)
64 real(kind=rp), allocatable :: u(:,:)
65 real(kind=rp), allocatable :: w(:)
66 real(kind=rp), allocatable :: z(:)
67 real(kind=rp), allocatable :: mi(:)
68 real(kind=rp), allocatable :: ni(:)
69 real(kind=rp), allocatable :: alpha(:)
70 real(kind=rp), allocatable :: beta(:)
71 type(c_ptr) :: p_d = c_null_ptr
72 type(c_ptr) :: q_d = c_null_ptr
73 type(c_ptr) :: r_d = c_null_ptr
74 type(c_ptr) :: s_d = c_null_ptr
75 type(c_ptr) :: u_d_d = c_null_ptr
76 type(c_ptr) :: w_d = c_null_ptr
77 type(c_ptr) :: z_d = c_null_ptr
78 type(c_ptr) :: mi_d = c_null_ptr
79 type(c_ptr) :: ni_d = c_null_ptr
80 type(c_ptr) :: alpha_d = c_null_ptr
81 type(c_ptr) :: beta_d = c_null_ptr
82 type(c_ptr), allocatable :: u_d(:)
83 type(c_ptr) :: gs_event = c_null_ptr
84 contains
85 procedure, pass(this) :: init => pipecg_device_init
86 procedure, pass(this) :: free => pipecg_device_free
87 procedure, pass(this) :: solve => pipecg_device_solve
88 procedure, pass(this) :: solve_coupled => pipecg_device_solve_coupled
89 end type pipecg_device_t
90
91#ifdef HAVE_CUDA
92 interface
93 subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
94 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
95 bind(c, name='cuda_pipecg_vecops')
96 use, intrinsic :: iso_c_binding
97 import c_rp
98 implicit none
99 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
100 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
101 integer(c_int) :: n
102 real(c_rp) :: alpha, beta, reduction(3)
103 end subroutine cuda_pipecg_vecops
104 end interface
105
106 interface
107 subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
108 p_cur, p_space, n) &
109 bind(c, name='cuda_cg_update_xp')
110 use, intrinsic :: iso_c_binding
111 implicit none
112 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
113 integer(c_int) :: p_cur, n, p_space
114 end subroutine cuda_cg_update_xp
115 end interface
116#elif HAVE_HIP
117 interface
118 subroutine hip_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
119 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) &
120 bind(c, name='hip_pipecg_vecops')
121 use, intrinsic :: iso_c_binding
122 import c_rp
123 implicit none
124 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
125 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
126 integer(c_int) :: n
127 real(c_rp) :: alpha, beta, reduction(3)
128 end subroutine hip_pipecg_vecops
129 end interface
130
131 interface
132 subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
133 p_cur, p_space, n) &
134 bind(c, name='hip_cg_update_xp')
135 use, intrinsic :: iso_c_binding
136 implicit none
137 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
138 integer(c_int) :: p_cur, n, p_space
139 end subroutine hip_cg_update_xp
140 end interface
141#endif
142
143contains
144
145 subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
146 w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
147 type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
148 type(c_ptr), value :: w_d, ni_d, mi_d, z_d, mult_d
149 integer(c_int) :: n
150 real(c_rp) :: alpha, beta, reduction(3)
151#ifdef HAVE_HIP
152 call hip_pipecg_vecops(p_d, q_d, r_d,&
153 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
154#elif HAVE_CUDA
155 call cuda_pipecg_vecops(p_d, q_d, r_d,&
156 s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
157#else
158 call neko_error('No device backend configured')
159#endif
160 end subroutine device_pipecg_vecops
161
162 subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
163 use, intrinsic :: iso_c_binding
164 type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
165 integer(c_int) :: p_cur, n, p_space
166#ifdef HAVE_HIP
167 call hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
168#elif HAVE_CUDA
169 call cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
170#else
171 call neko_error('No device backend configured')
172#endif
173 end subroutine device_cg_update_xp
174
176 subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
177 class(pipecg_device_t), target, intent(inout) :: this
178 class(pc_t), optional, intent(in), target :: M
179 integer, intent(in) :: n
180 integer, intent(in) :: max_iter
181 real(kind=rp), optional, intent(in) :: rel_tol
182 real(kind=rp), optional, intent(in) :: abs_tol
183 logical, optional, intent(in) :: monitor
184 type(c_ptr) :: ptr
185 integer(c_size_t) :: u_size
186 integer :: i
187
188 call this%free()
189
190 allocate(this%p(n))
191 allocate(this%q(n))
192 allocate(this%r(n))
193 allocate(this%s(n))
194 allocate(this%u(n, device_pipecg_p_space+1))
195 allocate(this%u_d(device_pipecg_p_space+1))
196 allocate(this%w(n))
197 allocate(this%z(n))
198 allocate(this%mi(n))
199 allocate(this%ni(n))
200 allocate(this%alpha(device_pipecg_p_space))
201 allocate(this%beta(device_pipecg_p_space))
202
203 if (present(m)) then
204 this%M => m
205 end if
206
207 call device_map(this%p, this%p_d, n)
208 call device_map(this%q, this%q_d, n)
209 call device_map(this%r, this%r_d, n)
210 call device_map(this%s, this%s_d, n)
211 call device_map(this%w, this%w_d, n)
212 call device_map(this%z, this%z_d, n)
213 call device_map(this%mi, this%mi_d, n)
214 call device_map(this%ni, this%ni_d, n)
215 call device_map(this%alpha, this%alpha_d, device_pipecg_p_space)
216 call device_map(this%beta, this%beta_d, device_pipecg_p_space)
217 do i = 1, device_pipecg_p_space+1
218 this%u_d(i) = c_null_ptr
219 call device_map(this%u(:,i), this%u_d(i), n)
220 end do
221 !Did not work with 4 for some reason...
222 u_size = 8*(device_pipecg_p_space+1)
223 call device_alloc(this%u_d_d, u_size)
224 ptr = c_loc(this%u_d)
225 call device_memcpy(ptr,this%u_d_d, u_size, &
226 host_to_device, sync=.false.)
227
228 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
229 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
230 else if (present(rel_tol) .and. present(abs_tol)) then
231 call this%ksp_init(max_iter, rel_tol, abs_tol)
232 else if (present(monitor) .and. present(abs_tol)) then
233 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
234 else if (present(rel_tol) .and. present(monitor)) then
235 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
236 else if (present(rel_tol)) then
237 call this%ksp_init(max_iter, rel_tol = rel_tol)
238 else if (present(abs_tol)) then
239 call this%ksp_init(max_iter, abs_tol = abs_tol)
240 else if (present(monitor)) then
241 call this%ksp_init(max_iter, monitor = monitor)
242 else
243 call this%ksp_init(max_iter)
244 end if
245
246 call device_event_create(this%gs_event, 2)
247
248 end subroutine pipecg_device_init
249
251 subroutine pipecg_device_free(this)
252 class(pipecg_device_t), intent(inout) :: this
253 integer :: i
254
255 call this%ksp_free()
256
257 if (allocated(this%p)) then
258 if (c_associated(this%p_d)) then
259 call device_unmap(this%p, this%p_d)
260 end if
261 deallocate(this%p)
262 end if
263 if (allocated(this%q)) then
264 if (c_associated(this%q_d)) then
265 call device_unmap(this%q, this%q_d)
266 end if
267 deallocate(this%q)
268 end if
269 if (allocated(this%r)) then
270 if (c_associated(this%r_d)) then
271 call device_unmap(this%r, this%r_d)
272 end if
273 deallocate(this%r)
274 end if
275 if (allocated(this%s)) then
276 if (c_associated(this%s_d)) then
277 call device_unmap(this%s, this%s_d)
278 end if
279 deallocate(this%s)
280 end if
281 if (allocated(this%u)) then
282 if (allocated(this%u_d)) then
283 do i = 1, device_pipecg_p_space
284 if (c_associated(this%u_d(i))) then
285 call device_unmap(this%u(:,i), this%u_d(i))
286 end if
287 end do
288 end if
289 deallocate(this%u)
290 end if
291 if (allocated(this%w)) then
292 if (c_associated(this%w_d)) then
293 call device_unmap(this%w, this%w_d)
294 end if
295 deallocate(this%w)
296 end if
297 if (allocated(this%z)) then
298 if (c_associated(this%z_d)) then
299 call device_unmap(this%z, this%z_d)
300 end if
301 deallocate(this%z)
302 end if
303 if (allocated(this%mi)) then
304 if (c_associated(this%mi_d)) then
305 call device_unmap(this%mi, this%mi_d)
306 end if
307 deallocate(this%mi)
308 end if
309 if (allocated(this%ni)) then
310 if (c_associated(this%ni_d)) then
311 call device_unmap(this%ni, this%ni_d)
312 end if
313 deallocate(this%ni)
314 end if
315 if (allocated(this%alpha)) then
316 if (c_associated(this%alpha_d)) then
317 call device_unmap(this%alpha, this%alpha_d)
318 end if
319 deallocate(this%alpha)
320 end if
321 if (allocated(this%beta)) then
322 if (c_associated(this%beta_d)) then
323 call device_unmap(this%beta, this%beta_d)
324 end if
325 deallocate(this%beta)
326 end if
327
328 if (c_associated(this%u_d_d)) then
329 call device_free(this%u_d_d)
330 end if
331
332 nullify(this%M)
333
334 if (c_associated(this%gs_event)) then
335 call device_event_destroy(this%gs_event)
336 end if
337
338 end subroutine pipecg_device_free
339
341 function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
342 class(pipecg_device_t), intent(inout) :: this
343 class(ax_t), intent(in) :: ax
344 type(field_t), intent(inout) :: x
345 integer, intent(in) :: n
346 real(kind=rp), dimension(n), intent(in) :: f
347 type(coef_t), intent(inout) :: coef
348 type(bc_list_t), intent(inout) :: blst
349 type(gs_t), intent(inout) :: gs_h
350 type(ksp_monitor_t) :: ksp_results
351 integer, optional, intent(in) :: niter
352 integer :: iter, max_iter, ierr, p_cur, p_prev, u_prev
353 real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
354 real(kind=rp) :: gamma1, gamma2, delta
355 real(kind=rp) :: tmp1, tmp2, tmp3
356 type(mpi_request) :: request
357 type(mpi_status) :: status
358 type(c_ptr) :: f_d
359 f_d = device_get_ptr(f)
360
361 if (present(niter)) then
362 max_iter = niter
363 else
364 max_iter = this%max_iter
365 end if
366 norm_fac = 1.0_rp / sqrt(coef%volume)
367
368 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
369 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni, &
370 alpha => this%alpha, beta => this%beta, &
371 alpha_d => this%alpha_d, beta_d => this%beta_d, &
372 p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
373 s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
374 w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
375
376 p_prev = device_pipecg_p_space !this%p_space
377 u_prev = device_pipecg_p_space + 1 !this%p_space+1
378 p_cur = 1
379 call device_rzero(x%x_d, n)
380 call device_rzero(z_d, n)
381 call device_rzero(q_d, n)
382 call device_rzero(p_d, n)
383 call device_rzero(s_d, n)
384 call device_copy(r_d, f_d, n)
385 !apply u=M^-1r
386 !call device_copy(u_d(u_prev), r_d, n)
387 call this%M%solve(u(1,u_prev), r, n)
388 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
389 call gs_h%op(w, n, gs_op_add, this%gs_event)
390 call device_event_sync(this%gs_event)
391 call blst%apply_scalar(w, n)
392
393 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
394 rnorm = sqrt(rtr)*norm_fac
395 ksp_results%res_start = rnorm
396 ksp_results%res_final = rnorm
397 ksp_results%iter = 0
398 if(abscmp(rnorm, 0.0_rp)) then
399 ksp_results%converged = .true.
400 return
401 end if
402
403 gamma1 = 0.0_rp
404 tmp1 = 0.0_rp
405 tmp2 = 0.0_rp
406 tmp3 = 0.0_rp
407 tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n)
408 tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n)
409 tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n)
410 reduction(1) = tmp1
411 reduction(2) = tmp2
412 reduction(3) = tmp3
413
414 call this%monitor_start('PipeCG')
415 do iter = 1, max_iter
416 call mpi_iallreduce(mpi_in_place, reduction, 3, &
417 mpi_real_precision, mpi_sum, neko_comm, request, ierr)
418
419 call this%M%solve(mi, w, n)
420 call ax%compute(ni, mi, coef, x%msh, x%Xh)
421 call gs_h%op(ni, n, gs_op_add, this%gs_event)
422 call device_event_sync(this%gs_event)
423 call blst%apply(ni, n)
424
425 call mpi_wait(request, status, ierr)
426 gamma2 = gamma1
427 gamma1 = reduction(1)
428 delta = reduction(2)
429 rtr = reduction(3)
430
431 rnorm = sqrt(rtr)*norm_fac
432 call this%monitor_iter(iter, rnorm)
433 if (rnorm .lt. this%abs_tol) exit
434
435
436 if (iter .gt. 1) then
437 beta(p_cur) = gamma1 / gamma2
438 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
439 else
440 beta(p_cur) = 0.0_rp
441 alpha(p_cur) = gamma1/delta
442 end if
443
444 call device_pipecg_vecops(p_d, q_d, r_d,&
445 s_d, u_d(u_prev), u_d(p_cur),&
446 w_d, z_d, ni_d,&
447 mi_d, alpha(p_cur), beta(p_cur),&
448 coef%mult_d, reduction, n)
449 if (p_cur .eq. device_pipecg_p_space) then
450 call device_memcpy(alpha, alpha_d, p_cur, &
451 host_to_device, sync=.false.)
452 call device_memcpy(beta, beta_d, p_cur, &
453 host_to_device, sync=.false.)
454 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
456 p_prev = p_cur
457 u_prev = device_pipecg_p_space + 1
458 alpha(1) = alpha(p_cur)
459 beta(1) = beta(p_cur)
460 p_cur = 1
461 else
462 u_prev = p_cur
463 p_prev = p_cur
464 p_cur = p_cur + 1
465 end if
466 end do
467
468 if ( p_cur .ne. 1) then
469 call device_memcpy(alpha, alpha_d, p_cur, host_to_device, sync=.false.)
470 call device_memcpy(beta, beta_d, p_cur, host_to_device, sync=.false.)
471 call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
473 end if
474 call this%monitor_stop()
475 ksp_results%res_final = rnorm
476 ksp_results%iter = iter
477 ksp_results%converged = this%is_converged(iter, rnorm)
478
479 end associate
480
481 end function pipecg_device_solve
482
484 function pipecg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
485 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
486 class(pipecg_device_t), intent(inout) :: this
487 class(ax_t), intent(in) :: ax
488 type(field_t), intent(inout) :: x
489 type(field_t), intent(inout) :: y
490 type(field_t), intent(inout) :: z
491 integer, intent(in) :: n
492 real(kind=rp), dimension(n), intent(in) :: fx
493 real(kind=rp), dimension(n), intent(in) :: fy
494 real(kind=rp), dimension(n), intent(in) :: fz
495 type(coef_t), intent(inout) :: coef
496 type(bc_list_t), intent(inout) :: blstx
497 type(bc_list_t), intent(inout) :: blsty
498 type(bc_list_t), intent(inout) :: blstz
499 type(gs_t), intent(inout) :: gs_h
500 type(ksp_monitor_t), dimension(3) :: ksp_results
501 integer, optional, intent(in) :: niter
502
503 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
504 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
505 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
506
507 end function pipecg_device_solve_coupled
508
509end module pipecg_device
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:53
integer, public pe_size
MPI size of communicator.
Definition comm.F90:61
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:45
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
integer, parameter, public host_to_device
Definition device.F90:48
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:240
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1550
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:209
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1516
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction, n)
subroutine pipecg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
integer, parameter device_pipecg_p_space
type(ksp_monitor_t) function, dimension(3) pipecg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
subroutine pipecg_device_free(this)
Deallocate a pipelined PCG solver.
type(ksp_monitor_t) function pipecg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
void hip_cg_update_xp(void *x, void *p, void *u, void *alpha, void *beta, int *p_cur, int *p_space, int *n)
void hip_pipecg_vecops(void *p, void *q, void *r, void *s, void *u1, void *u2, void *w, void *z, void *ni, void *mi, real *alpha, real *beta, void *mult, real *reduction, int *n)
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Pipelined preconditioned conjugate gradient method.
Defines a canonical Krylov preconditioner.
Definition precon.f90:40