53 real(kind=
rp),
allocatable :: d(:)
54 real(kind=
rp),
allocatable :: w(:)
55 real(kind=
rp),
allocatable :: r(:)
56 type(c_ptr) :: d_d = c_null_ptr
57 type(c_ptr) :: w_d = c_null_ptr
58 type(c_ptr) :: r_d = c_null_ptr
59 type(c_ptr) :: gs_event = c_null_ptr
60 real(kind=
rp) :: tha, dlt
61 integer :: power_its = 150
62 logical :: recompute_eigs = .true.
75 integer,
intent(in) :: max_iter
76 class(
pc_t),
optional,
intent(inout),
target :: M
77 integer,
intent(in) :: n
78 real(kind=
rp),
optional,
intent(inout) :: rel_tol
79 real(kind=
rp),
optional,
intent(inout) :: abs_tol
80 logical,
optional,
intent(in) :: monitor
95 if (
present(rel_tol) .and.
present(abs_tol) .and.
present(monitor))
then
96 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
97 else if (
present(rel_tol) .and.
present(abs_tol))
then
98 call this%ksp_init(max_iter, rel_tol, abs_tol)
99 else if (
present(monitor) .and.
present(abs_tol))
then
100 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
101 else if (
present(rel_tol) .and.
present(monitor))
then
102 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
103 else if (
present(rel_tol))
then
104 call this%ksp_init(max_iter, rel_tol = rel_tol)
105 else if (
present(abs_tol))
then
106 call this%ksp_init(max_iter, abs_tol = abs_tol)
107 else if (
present(monitor))
then
108 call this%ksp_init(max_iter, monitor = monitor)
110 call this%ksp_init(max_iter)
122 if (
allocated(this%d))
then
126 if (
allocated(this%w))
then
130 if (
allocated(this%r))
then
136 if (c_associated(this%d_d))
then
140 if (c_associated(this%w_d))
then
144 if (c_associated(this%r_d))
then
148 if (c_associated(this%gs_event))
then
156 class(
ax_t),
intent(inout) :: Ax
157 type(
field_t),
intent(inout) :: x
158 integer,
intent(in) :: n
159 type(
coef_t),
intent(inout) :: coef
161 type(
gs_t),
intent(inout) :: gs_h
162 real(kind=
rp) :: lam, b, a, rn
163 real(kind=
rp) :: boost = 1.2_rp
164 real(kind=
rp) :: lam_factor = 30.0_rp
165 real(kind=
rp) :: wtw, dtw, dtd
168 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
172 call random_number(rn)
177 call gs_h%op(d, n, gs_op_add, this%gs_event)
181 do i = 1, this%power_its
182 call ax%compute(w, d, coef, x%msh, x%Xh)
183 call gs_h%op(w, n, gs_op_add, this%gs_event)
191 call ax%compute(w, d, coef, x%msh, x%Xh)
192 call gs_h%op(w, n, gs_op_add, this%gs_event)
200 this%tha = (b+a)/2.0_rp
201 this%dlt = (b-a)/2.0_rp
203 this%recompute_eigs = .false.
211 class(ax_t),
intent(inout) :: ax
212 type(field_t),
intent(inout) :: x
213 integer,
intent(in) :: n
214 real(kind=rp),
dimension(n),
intent(inout) :: f
215 type(coef_t),
intent(inout) :: coef
216 type(bc_list_t),
intent(inout) :: blst
217 type(gs_t),
intent(inout) :: gs_h
218 type(ksp_monitor_t) :: ksp_results
219 integer,
optional,
intent(in) :: niter
220 integer :: iter, max_iter
221 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
224 f_d = device_get_ptr(f)
226 if (this%recompute_eigs)
then
230 if (
present(niter))
then
233 max_iter = this%max_iter
235 norm_fac = 1.0_rp / sqrt(coef%volume)
237 associate( w => this%w, r => this%r, d => this%d, &
238 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
240 call device_copy(r_d, f_d, n)
241 call ax%compute(w, x%x, coef, x%msh, x%Xh)
242 call gs_h%op(w, n, gs_op_add, this%gs_event)
243 call bc_list_apply(blst, w, n)
244 call device_sub2(r_d, w_d, n)
246 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
247 rnorm = sqrt(rtr) * norm_fac
248 ksp_results%res_start = rnorm
249 ksp_results%res_final = rnorm
253 call this%M%solve(w, r, n)
254 call device_copy(d_d, w_d, n)
255 a = 2.0_rp / this%tha
256 call device_add2s2(x%x_d, d_d, a, n)
259 do iter = 2, max_iter
261 call device_copy(r_d, f_d, n)
262 call ax%compute(w, x%x, coef, x%msh, x%Xh)
263 call gs_h%op(w, n, gs_op_add, this%gs_event)
264 call bc_list_apply(blst, w, n)
265 call device_sub2(r_d, w_d, n)
267 call this%M%solve(w, r, n)
269 if (iter .eq. 2)
then
270 b = 0.5_rp * (this%dlt * a)**2
272 b = (this%dlt * a / 2.0_rp)**2
274 a = 1.0_rp/(this%tha - b/a)
275 call device_add2s1(d_d, w_d, b, n)
277 call device_add2s2(x%x_d, d_d, a, n)
281 call device_copy(r_d, f_d, n)
282 call ax%compute(w, x%x, coef, x%msh, x%Xh)
283 call gs_h%op(w, n, gs_op_add, this%gs_event)
284 call bc_list_apply(blst, w, n)
285 call device_sub2(r_d, w_d, n)
286 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
287 rnorm = sqrt(rtr) * norm_fac
288 ksp_results%res_final = rnorm
289 ksp_results%iter = iter
295 n, coef, blstx, blsty, blstz, gs_h, niter)
result(ksp_results)
297 class(ax_t),
intent(inout) :: ax
298 type(field_t),
intent(inout) :: x
299 type(field_t),
intent(inout) :: y
300 type(field_t),
intent(inout) :: z
301 integer,
intent(in) :: n
302 real(kind=rp),
dimension(n),
intent(inout) :: fx
303 real(kind=rp),
dimension(n),
intent(inout) :: fy
304 real(kind=rp),
dimension(n),
intent(inout) :: fz
305 type(coef_t),
intent(inout) :: coef
306 type(bc_list_t),
intent(inout) :: blstx
307 type(bc_list_t),
intent(inout) :: blsty
308 type(bc_list_t),
intent(inout) :: blstz
309 type(gs_t),
intent(inout) :: gs_h
310 type(ksp_monitor_t),
dimension(3) :: ksp_results
311 integer,
optional,
intent(in) :: niter
313 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
314 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
315 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Defines a Matrix-vector product.
Defines a boundary condition.
Chebyshev preconditioner.
subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a standard solver.
subroutine cheby_device_free(this)
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_event_create(event, flags)
Create a device event queue.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Base type for a matrix-vector product providing .
A list of boundary conditions.
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Defines a canonical Krylov preconditioner.
The function space for the SEM solution fields.