48 use,
intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
54 real(kind=
rp),
allocatable :: d(:)
55 real(kind=
rp),
allocatable :: w(:)
56 real(kind=
rp),
allocatable :: r(:)
57 type(c_ptr) :: d_d = c_null_ptr
58 type(c_ptr) :: w_d = c_null_ptr
59 type(c_ptr) :: r_d = c_null_ptr
60 type(c_ptr) :: gs_event = c_null_ptr
61 real(kind=
rp) :: tha, dlt
62 integer :: power_its = 150
63 logical :: recompute_eigs = .true.
76 integer,
intent(in) :: max_iter
77 class(
pc_t),
optional,
intent(in),
target :: M
78 integer,
intent(in) :: n
79 real(kind=
rp),
optional,
intent(in) :: rel_tol
80 real(kind=
rp),
optional,
intent(in) :: abs_tol
81 logical,
optional,
intent(in) :: monitor
96 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
97 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
98 else if (
present(rel_tol) .and.
present(abs_tol))
then
99 call this%ksp_init(max_iter, rel_tol, abs_tol)
100 else if (
present(monitor) .and.
present(abs_tol))
then
101 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
102 else if (
present(rel_tol) .and.
present(monitor))
then
103 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
104 else if (
present(rel_tol))
then
105 call this%ksp_init(max_iter, rel_tol = rel_tol)
106 else if (
present(abs_tol))
then
107 call this%ksp_init(max_iter, abs_tol = abs_tol)
108 else if (
present(monitor))
then
109 call this%ksp_init(max_iter, monitor = monitor)
111 call this%ksp_init(max_iter)
123 if (
allocated(this%d))
then
127 if (
allocated(this%w))
then
131 if (
allocated(this%r))
then
137 if (c_associated(this%d_d))
then
141 if (c_associated(this%w_d))
then
145 if (c_associated(this%r_d))
then
149 if (c_associated(this%gs_event))
then
157 class(
ax_t),
intent(in) :: Ax
158 type(
field_t),
intent(inout) :: x
159 integer,
intent(in) :: n
160 type(
coef_t),
intent(inout) :: coef
162 type(
gs_t),
intent(inout) :: gs_h
163 real(kind=
rp) :: lam, b, a, rn
164 real(kind=
rp) :: boost = 1.2_rp
165 real(kind=
rp) :: lam_factor = 30.0_rp
166 real(kind=
rp) :: wtw, dtw, dtd
169 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
173 call random_number(rn)
178 call gs_h%op(d, n, gs_op_add, this%gs_event)
179 call blst%apply(d, n)
182 do i = 1, this%power_its
183 call ax%compute(w, d, coef, x%msh, x%Xh)
184 call gs_h%op(w, n, gs_op_add, this%gs_event)
185 call blst%apply(w, n)
189 call blst%apply(d, n)
192 call ax%compute(w, d, coef, x%msh, x%Xh)
193 call gs_h%op(w, n, gs_op_add, this%gs_event)
194 call blst%apply(w, n)
201 this%tha = (b+a)/2.0_rp
202 this%dlt = (b-a)/2.0_rp
204 this%recompute_eigs = .false.
212 class(ax_t),
intent(in) :: ax
213 type(field_t),
intent(inout) :: x
214 integer,
intent(in) :: n
215 real(kind=rp),
dimension(n),
intent(in) :: f
216 type(coef_t),
intent(inout) :: coef
217 type(bc_list_t),
intent(inout) :: blst
218 type(gs_t),
intent(inout) :: gs_h
219 type(ksp_monitor_t) :: ksp_results
220 integer,
optional,
intent(in) :: niter
221 integer :: iter, max_iter
222 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
225 f_d = device_get_ptr(f)
227 if (this%recompute_eigs)
then
231 if (
present(niter))
then
234 max_iter = this%max_iter
236 norm_fac = 1.0_rp / sqrt(coef%volume)
238 associate( w => this%w, r => this%r, d => this%d, &
239 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
241 call device_copy(r_d, f_d, n)
242 call ax%compute(w, x%x, coef, x%msh, x%Xh)
243 call gs_h%op(w, n, gs_op_add, this%gs_event)
244 call blst%apply(w, n)
245 call device_sub2(r_d, w_d, n)
247 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
248 rnorm = sqrt(rtr) * norm_fac
249 ksp_results%res_start = rnorm
250 ksp_results%res_final = rnorm
254 call this%M%solve(w, r, n)
255 call device_copy(d_d, w_d, n)
256 a = 2.0_rp / this%tha
257 call device_add2s2(x%x_d, d_d, a, n)
260 do iter = 2, max_iter
262 call device_copy(r_d, f_d, n)
263 call ax%compute(w, x%x, coef, x%msh, x%Xh)
264 call gs_h%op(w, n, gs_op_add, this%gs_event)
265 call blst%apply(w, n)
266 call device_sub2(r_d, w_d, n)
268 call this%M%solve(w, r, n)
270 if (iter .eq. 2)
then
271 b = 0.5_rp * (this%dlt * a)**2
273 b = (this%dlt * a / 2.0_rp)**2
275 a = 1.0_rp/(this%tha - b/a)
276 call device_add2s1(d_d, w_d, b, n)
278 call device_add2s2(x%x_d, d_d, a, n)
282 call device_copy(r_d, f_d, n)
283 call ax%compute(w, x%x, coef, x%msh, x%Xh)
284 call gs_h%op(w, n, gs_op_add, this%gs_event)
285 call blst%apply(w, n)
286 call device_sub2(r_d, w_d, n)
287 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
288 rnorm = sqrt(rtr) * norm_fac
291 ksp_results%res_final = rnorm
292 ksp_results%iter = iter
293 ksp_results%converged = this%is_converged(iter, rnorm)
299 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
301 class(ax_t),
intent(in) :: ax
302 type(field_t),
intent(inout) :: x
303 type(field_t),
intent(inout) :: y
304 type(field_t),
intent(inout) :: z
305 integer,
intent(in) :: n
306 real(kind=rp),
dimension(n),
intent(in) :: fx
307 real(kind=rp),
dimension(n),
intent(in) :: fy
308 real(kind=rp),
dimension(n),
intent(in) :: fz
309 type(coef_t),
intent(inout) :: coef
310 type(bc_list_t),
intent(inout) :: blstx
311 type(bc_list_t),
intent(inout) :: blsty
312 type(bc_list_t),
intent(inout) :: blstz
313 type(gs_t),
intent(inout) :: gs_h
314 type(ksp_monitor_t),
dimension(3) :: ksp_results
315 integer,
optional,
intent(in) :: niter
317 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
318 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
319 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Base type for a matrix-vector product providing .
A list of allocatable `bc_t`. Follows the standard interface of lists.
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Defines a canonical Krylov preconditioner.
The function space for the SEM solution fields.