50 use,
intrinsic :: iso_c_binding, only : c_ptr, c_int, &
51 c_null_ptr, c_associated
57 real(kind=
rp),
allocatable :: d(:)
58 real(kind=
rp),
allocatable :: w(:)
59 real(kind=
rp),
allocatable :: r(:)
60 type(c_ptr) :: d_d = c_null_ptr
61 type(c_ptr) :: w_d = c_null_ptr
62 type(c_ptr) :: r_d = c_null_ptr
63 type(c_ptr) :: gs_event = c_null_ptr
64 real(kind=
rp) :: tha, dlt
65 integer :: power_its = 150
66 logical :: recompute_eigs = .true.
67 logical :: zero_initial_guess = .false.
79 bind(c, name =
'hip_cheby_part1')
80 use,
intrinsic :: iso_c_binding
83 type(c_ptr),
value :: d_d, x_d, strm
91 bind(c, name =
'hip_cheby_part2')
92 use,
intrinsic :: iso_c_binding
95 type(c_ptr),
value :: d_d, w_d, x_d, strm
96 real(c_rp) :: tmp1, tmp2
102 subroutine cuda_cheby_device_part1(d_d, x_d, inv_tha, n, strm) &
103 bind(c, name =
'cuda_cheby_part1')
104 use,
intrinsic :: iso_c_binding
107 type(c_ptr),
value :: d_d, x_d, strm
108 real(c_rp) :: inv_tha
110 end subroutine cuda_cheby_device_part1
114 subroutine cuda_cheby_device_part2(d_d, w_d, x_d, tmp1, tmp2, n, strm) &
115 bind(c, name =
'cuda_cheby_part2')
116 use,
intrinsic :: iso_c_binding
119 type(c_ptr),
value :: d_d, w_d, x_d, strm
120 real(c_rp) :: tmp1, tmp2
122 end subroutine cuda_cheby_device_part2
128 type(c_ptr) :: d_d, x_d
129 real(c_rp) :: inv_tha
134 call cuda_cheby_device_part1(d_d, x_d, inv_tha, n,
glb_cmd_queue)
135#else !Fallback to device_math for missing device kernels
144 type(c_ptr) :: d_d, w_d, x_d
145 real(c_rp) :: tmp1, tmp2
150 call cuda_cheby_device_part2(d_d, w_d, x_d, tmp1, tmp2, n,
glb_cmd_queue)
151#else !Fallback to device_math for missing device kernels
161 integer,
intent(in) :: max_iter
162 class(
pc_t),
optional,
intent(in),
target :: M
163 integer,
intent(in) :: n
164 real(kind=
rp),
optional,
intent(in) :: rel_tol
165 real(kind=
rp),
optional,
intent(in) :: abs_tol
166 logical,
optional,
intent(in) :: monitor
181 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
182 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
183 else if (
present(rel_tol) .and.
present(abs_tol))
then
184 call this%ksp_init(max_iter, rel_tol, abs_tol)
185 else if (
present(monitor) .and.
present(abs_tol))
then
186 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
187 else if (
present(rel_tol) .and.
present(monitor))
then
188 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
189 else if (
present(rel_tol))
then
190 call this%ksp_init(max_iter, rel_tol = rel_tol)
191 else if (
present(abs_tol))
then
192 call this%ksp_init(max_iter, abs_tol = abs_tol)
193 else if (
present(monitor))
then
194 call this%ksp_init(max_iter, monitor = monitor)
196 call this%ksp_init(max_iter)
208 if (
allocated(this%d))
then
212 if (
allocated(this%w))
then
216 if (
allocated(this%r))
then
222 if (c_associated(this%d_d))
then
226 if (c_associated(this%w_d))
then
230 if (c_associated(this%r_d))
then
234 if (c_associated(this%gs_event))
then
242 class(
ax_t),
intent(in) :: Ax
243 type(
field_t),
intent(inout) :: x
244 integer,
intent(in) :: n
245 type(
coef_t),
intent(inout) :: coef
247 type(
gs_t),
intent(inout) :: gs_h
248 real(kind=
rp) :: lam, b, a, rn
249 real(kind=
rp) :: boost = 1.1_rp
250 real(kind=
rp) :: lam_factor = 30.0_rp
251 real(kind=
rp) :: wtw, dtw, dtd
254 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
258 call random_number(rn)
263 call gs_h%op(d, n, gs_op_add, this%gs_event)
264 call blst%apply(d, n)
267 do i = 1, this%power_its
268 call ax%compute(w, d, coef, x%msh, x%Xh)
269 call gs_h%op(w, n, gs_op_add, this%gs_event)
270 call blst%apply(w, n)
271 if (
associated(this%schwarz))
then
272 call this%schwarz%compute(this%r, w)
275 call this%M%solve(this%r, w, n)
281 call blst%apply(d, n)
284 call ax%compute(w, d, coef, x%msh, x%Xh)
285 call gs_h%op(w, n, gs_op_add, this%gs_event)
286 call blst%apply(w, n)
287 if (
associated(this%schwarz))
then
288 call this%schwarz%compute(this%r, w)
291 call this%M%solve(this%r, w, n)
300 this%tha = (b+a)/2.0_rp
301 this%dlt = (b-a)/2.0_rp
303 this%recompute_eigs = .false.
311 class(ax_t),
intent(in) :: ax
312 type(field_t),
intent(inout) :: x
313 integer,
intent(in) :: n
314 real(kind=rp),
dimension(n),
intent(in) :: f
315 type(coef_t),
intent(inout) :: coef
316 type(bc_list_t),
intent(inout) :: blst
317 type(gs_t),
intent(inout) :: gs_h
318 type(ksp_monitor_t) :: ksp_results
319 integer,
optional,
intent(in) :: niter
320 integer :: iter, max_iter
321 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
324 f_d = device_get_ptr(f)
326 if (this%recompute_eigs)
then
330 if (
present(niter))
then
333 max_iter = this%max_iter
335 norm_fac = 1.0_rp / sqrt(coef%volume)
337 associate( w => this%w, r => this%r, d => this%d, &
338 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
340 call device_copy(r_d, f_d, n)
341 call ax%compute(w, x%x, coef, x%msh, x%Xh)
342 call gs_h%op(w, n, gs_op_add, this%gs_event)
343 call blst%apply(w, n)
344 call device_sub2(r_d, w_d, n)
346 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
347 rnorm = sqrt(rtr) * norm_fac
348 ksp_results%res_start = rnorm
349 ksp_results%res_final = rnorm
353 call this%M%solve(w, r, n)
354 call device_copy(d_d, w_d, n)
355 a = 2.0_rp / this%tha
356 call device_add2s2(x%x_d, d_d, a, n)
359 do iter = 2, max_iter
361 call device_copy(r_d, f_d, n)
362 call ax%compute(w, x%x, coef, x%msh, x%Xh)
363 call gs_h%op(w, n, gs_op_add, this%gs_event)
364 call blst%apply(w, n)
365 call device_sub2(r_d, w_d, n)
367 call this%M%solve(w, r, n)
369 if (iter .eq. 2)
then
370 b = 0.5_rp * (this%dlt * a)**2
372 b = (this%dlt * a / 2.0_rp)**2
374 a = 1.0_rp/(this%tha - b/a)
375 call device_add2s1(d_d, w_d, b, n)
377 call device_add2s2(x%x_d, d_d, a, n)
381 call device_copy(r_d, f_d, n)
382 call ax%compute(w, x%x, coef, x%msh, x%Xh)
383 call gs_h%op(w, n, gs_op_add, this%gs_event)
384 call blst%apply(w, n)
385 call device_sub2(r_d, w_d, n)
386 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
387 rnorm = sqrt(rtr) * norm_fac
390 ksp_results%res_final = rnorm
391 ksp_results%iter = iter
392 ksp_results%converged = this%is_converged(iter, rnorm)
400 class(ax_t),
intent(in) :: ax
401 type(field_t),
intent(inout) :: x
402 integer,
intent(in) :: n
403 real(kind=rp),
dimension(n),
intent(in) :: f
404 type(coef_t),
intent(inout) :: coef
405 type(bc_list_t),
intent(inout) :: blst
406 type(gs_t),
intent(inout) :: gs_h
407 type(ksp_monitor_t) :: ksp_results
408 integer,
optional,
intent(in) :: niter
409 integer :: iter, max_iter
410 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
411 real(kind=rp) :: rhok, rhokp1, sig1, tmp1, tmp2
414 f_d = device_get_ptr(f)
416 if (this%recompute_eigs)
then
420 if (
present(niter))
then
423 max_iter = this%max_iter
425 norm_fac = 1.0_rp / sqrt(coef%volume)
427 associate( w => this%w, r => this%r, d => this%d, &
428 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
430 if (.not.this%zero_initial_guess)
then
431 call ax%compute(w, x%x, coef, x%msh, x%Xh)
432 call gs_h%op(w, n, gs_op_add, this%gs_event)
433 call blst%apply(w, n)
434 call device_sub3(r_d, f_d, w_d, n)
436 call device_copy(r_d, f_d, n)
437 this%zero_initial_guess = .false.
441 if (
associated(this%schwarz))
then
442 call this%schwarz%compute(d, r)
444 call this%M%solve(d, r, n)
447 tmp1 = 1.0_rp / this%tha
450 sig1 = this%tha / this%dlt
454 do iter = 2, max_iter
455 rhokp1 = 1.0_rp / (2.0_rp * sig1 - rhok)
457 tmp2 = 2.0_rp * rhokp1 / this%dlt
460 call ax%compute(w, x%x, coef, x%msh, x%Xh)
461 call gs_h%op(w, n, gs_op_add, this%gs_event)
462 call blst%apply(w, n)
463 call device_sub3(r_d, f_d, w_d, n)
465 if (
associated(this%schwarz))
then
466 call this%schwarz%compute(w, r)
468 call this%M%solve(w, r, n)
480 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
482 class(ax_t),
intent(in) :: ax
483 type(field_t),
intent(inout) :: x
484 type(field_t),
intent(inout) :: y
485 type(field_t),
intent(inout) :: z
486 integer,
intent(in) :: n
487 real(kind=rp),
dimension(n),
intent(in) :: fx
488 real(kind=rp),
dimension(n),
intent(in) :: fy
489 real(kind=rp),
dimension(n),
intent(in) :: fz
490 type(coef_t),
intent(inout) :: coef
491 type(bc_list_t),
intent(inout) :: blstx
492 type(bc_list_t),
intent(inout) :: blsty
493 type(bc_list_t),
intent(inout) :: blstz
494 type(gs_t),
intent(inout) :: gs_h
495 type(ksp_monitor_t),
dimension(3) :: ksp_results
496 integer,
optional,
intent(in) :: niter
498 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
499 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
500 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
subroutine cheby_device_part2(d_d, w_d, x_d, tmp1, tmp2, n)
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_part1(d_d, x_d, inv_tha, n)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
type(ksp_monitor_t) function cheby_device_impl(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Overlapping schwarz solves.
Defines a function space.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Defines a canonical Krylov preconditioner.
The function space for the SEM solution fields.