50 use,
intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
56 real(kind=
rp),
allocatable :: d(:)
57 real(kind=
rp),
allocatable :: w(:)
58 real(kind=
rp),
allocatable :: r(:)
59 type(c_ptr) :: d_d = c_null_ptr
60 type(c_ptr) :: w_d = c_null_ptr
61 type(c_ptr) :: r_d = c_null_ptr
62 type(c_ptr) :: gs_event = c_null_ptr
63 real(kind=
rp) :: tha, dlt
64 integer :: power_its = 150
65 logical :: recompute_eigs = .true.
66 logical :: zero_initial_guess = .false.
80 integer,
intent(in) :: max_iter
81 class(
pc_t),
optional,
intent(in),
target :: M
82 integer,
intent(in) :: n
83 real(kind=
rp),
optional,
intent(in) :: rel_tol
84 real(kind=
rp),
optional,
intent(in) :: abs_tol
85 logical,
optional,
intent(in) :: monitor
100 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
101 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
102 else if (
present(rel_tol) .and.
present(abs_tol))
then
103 call this%ksp_init(max_iter, rel_tol, abs_tol)
104 else if (
present(monitor) .and.
present(abs_tol))
then
105 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
106 else if (
present(rel_tol) .and.
present(monitor))
then
107 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
108 else if (
present(rel_tol))
then
109 call this%ksp_init(max_iter, rel_tol = rel_tol)
110 else if (
present(abs_tol))
then
111 call this%ksp_init(max_iter, abs_tol = abs_tol)
112 else if (
present(monitor))
then
113 call this%ksp_init(max_iter, monitor = monitor)
115 call this%ksp_init(max_iter)
127 if (
allocated(this%d))
then
131 if (
allocated(this%w))
then
135 if (
allocated(this%r))
then
141 if (c_associated(this%d_d))
then
145 if (c_associated(this%w_d))
then
149 if (c_associated(this%r_d))
then
153 if (c_associated(this%gs_event))
then
161 class(
ax_t),
intent(in) :: Ax
162 type(
field_t),
intent(inout) :: x
163 integer,
intent(in) :: n
164 type(
coef_t),
intent(inout) :: coef
166 type(
gs_t),
intent(inout) :: gs_h
167 real(kind=
rp) :: lam, b, a, rn
168 real(kind=
rp) :: boost = 1.1_rp
169 real(kind=
rp) :: lam_factor = 30.0_rp
170 real(kind=
rp) :: wtw, dtw, dtd
173 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
177 call random_number(rn)
182 call gs_h%op(d, n, gs_op_add, this%gs_event)
183 call blst%apply(d, n)
186 do i = 1, this%power_its
187 call ax%compute(w, d, coef, x%msh, x%Xh)
188 call gs_h%op(w, n, gs_op_add, this%gs_event)
189 call blst%apply(w, n)
190 if (
associated(this%schwarz))
then
191 call this%schwarz%compute(this%r, w)
194 call this%M%solve(this%r, w, n)
200 call blst%apply(d, n)
203 call ax%compute(w, d, coef, x%msh, x%Xh)
204 call gs_h%op(w, n, gs_op_add, this%gs_event)
205 call blst%apply(w, n)
206 if (
associated(this%schwarz))
then
207 call this%schwarz%compute(this%r, w)
210 call this%M%solve(this%r, w, n)
219 this%tha = (b+a)/2.0_rp
220 this%dlt = (b-a)/2.0_rp
222 this%recompute_eigs = .false.
230 class(ax_t),
intent(in) :: ax
231 type(field_t),
intent(inout) :: x
232 integer,
intent(in) :: n
233 real(kind=rp),
dimension(n),
intent(in) :: f
234 type(coef_t),
intent(inout) :: coef
235 type(bc_list_t),
intent(inout) :: blst
236 type(gs_t),
intent(inout) :: gs_h
237 type(ksp_monitor_t) :: ksp_results
238 integer,
optional,
intent(in) :: niter
239 integer :: iter, max_iter
240 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
243 f_d = device_get_ptr(f)
245 if (this%recompute_eigs)
then
249 if (
present(niter))
then
252 max_iter = this%max_iter
254 norm_fac = 1.0_rp / sqrt(coef%volume)
256 associate( w => this%w, r => this%r, d => this%d, &
257 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
259 call device_copy(r_d, f_d, n)
260 call ax%compute(w, x%x, coef, x%msh, x%Xh)
261 call gs_h%op(w, n, gs_op_add, this%gs_event)
262 call blst%apply(w, n)
263 call device_sub2(r_d, w_d, n)
265 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
266 rnorm = sqrt(rtr) * norm_fac
267 ksp_results%res_start = rnorm
268 ksp_results%res_final = rnorm
272 call this%M%solve(w, r, n)
273 call device_copy(d_d, w_d, n)
274 a = 2.0_rp / this%tha
275 call device_add2s2(x%x_d, d_d, a, n)
278 do iter = 2, max_iter
280 call device_copy(r_d, f_d, n)
281 call ax%compute(w, x%x, coef, x%msh, x%Xh)
282 call gs_h%op(w, n, gs_op_add, this%gs_event)
283 call blst%apply(w, n)
284 call device_sub2(r_d, w_d, n)
286 call this%M%solve(w, r, n)
288 if (iter .eq. 2)
then
289 b = 0.5_rp * (this%dlt * a)**2
291 b = (this%dlt * a / 2.0_rp)**2
293 a = 1.0_rp/(this%tha - b/a)
294 call device_add2s1(d_d, w_d, b, n)
296 call device_add2s2(x%x_d, d_d, a, n)
300 call device_copy(r_d, f_d, n)
301 call ax%compute(w, x%x, coef, x%msh, x%Xh)
302 call gs_h%op(w, n, gs_op_add, this%gs_event)
303 call blst%apply(w, n)
304 call device_sub2(r_d, w_d, n)
305 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
306 rnorm = sqrt(rtr) * norm_fac
309 ksp_results%res_final = rnorm
310 ksp_results%iter = iter
311 ksp_results%converged = this%is_converged(iter, rnorm)
319 class(ax_t),
intent(in) :: ax
320 type(field_t),
intent(inout) :: x
321 integer,
intent(in) :: n
322 real(kind=rp),
dimension(n),
intent(in) :: f
323 type(coef_t),
intent(inout) :: coef
324 type(bc_list_t),
intent(inout) :: blst
325 type(gs_t),
intent(inout) :: gs_h
326 type(ksp_monitor_t) :: ksp_results
327 integer,
optional,
intent(in) :: niter
328 integer :: iter, max_iter
329 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
330 real(kind=rp) :: rhok, rhokp1, sig1, tmp1, tmp2
333 f_d = device_get_ptr(f)
335 if (this%recompute_eigs)
then
339 if (
present(niter))
then
342 max_iter = this%max_iter
344 norm_fac = 1.0_rp / sqrt(coef%volume)
346 associate( w => this%w, r => this%r, d => this%d, &
347 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
349 if (.not.this%zero_initial_guess)
then
350 call ax%compute(w, x%x, coef, x%msh, x%Xh)
351 call gs_h%op(w, n, gs_op_add, this%gs_event)
352 call blst%apply(w, n)
353 call device_sub3(r_d, f_d, w_d, n)
355 call device_copy(r_d, f_d, n)
356 this%zero_initial_guess = .false.
360 if (
associated(this%schwarz))
then
361 call this%schwarz%compute(d, r)
363 call this%M%solve(d, r, n)
365 call device_cmult( d_d, (1.0_rp / this%tha), n)
366 call device_add2( x%x_d, d_d, n)
368 sig1 = this%tha / this%dlt
372 do iter = 2, max_iter
373 rhokp1 = 1.0_rp / (2.0_rp * sig1 - rhok)
375 tmp2 = 2.0_rp * rhokp1 / this%dlt
378 call ax%compute(w, x%x, coef, x%msh, x%Xh)
379 call gs_h%op(w, n, gs_op_add, this%gs_event)
380 call blst%apply(w, n)
381 call device_sub3(r_d, f_d, w_d, n)
383 if (
associated(this%schwarz))
then
384 call this%schwarz%compute(w, r)
386 call this%M%solve(w, r, n)
388 call device_cmult( d_d, tmp1, n)
389 call device_add2s2( d_d, w_d, tmp2, n)
390 call device_add2( x%x_d, d_d, n)
398 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
400 class(ax_t),
intent(in) :: ax
401 type(field_t),
intent(inout) :: x
402 type(field_t),
intent(inout) :: y
403 type(field_t),
intent(inout) :: z
404 integer,
intent(in) :: n
405 real(kind=rp),
dimension(n),
intent(in) :: fx
406 real(kind=rp),
dimension(n),
intent(in) :: fy
407 real(kind=rp),
dimension(n),
intent(in) :: fz
408 type(coef_t),
intent(inout) :: coef
409 type(bc_list_t),
intent(inout) :: blstx
410 type(bc_list_t),
intent(inout) :: blsty
411 type(bc_list_t),
intent(inout) :: blstz
412 type(gs_t),
intent(inout) :: gs_h
413 type(ksp_monitor_t),
dimension(3) :: ksp_results
414 integer,
optional,
intent(in) :: niter
416 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
417 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
418 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
type(ksp_monitor_t) function cheby_device_impl(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public rp
Global precision used in computations.
Overlapping schwarz solves.
Defines a function space.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Defines a canonical Krylov preconditioner.
The function space for the SEM solution fields.