52 real(kind=
rp),
allocatable :: r(:)
53 real(kind=
rp),
allocatable :: p(:)
54 real(kind=
rp),
allocatable :: pr(:,:)
65 subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol)
66 class(
cacg_t),
intent(inout) :: this
67 class(
pc_t),
optional,
intent(inout),
target :: M
68 integer,
intent(in) :: n
69 integer,
intent(in) :: max_iter
70 real(kind=
rp),
optional,
intent(inout) :: rel_tol
71 real(kind=
rp),
optional,
intent(inout) :: abs_tol
72 integer,
optional,
intent(inout) :: s
81 call neko_warning(
"Communication Avoiding CG chosen, be aware of potential instabilities")
86 allocate(this%PR(n,4*this%s+1))
91 if (
present(rel_tol) .and.
present(abs_tol))
then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (
present(rel_tol))
then
94 call this%ksp_init(max_iter, rel_tol=rel_tol)
95 else if (
present(abs_tol))
then
96 call this%ksp_init(max_iter, abs_tol=abs_tol)
98 call this%ksp_init(max_iter)
105 class(
cacg_t),
intent(inout) :: this
109 if (
allocated(this%PR))
then
113 if (
allocated(this%r))
then
117 if (
allocated(this%p))
then
127 function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
result(ksp_results)
128 class(
cacg_t),
intent(inout) :: this
129 class(
ax_t),
intent(inout) :: ax
130 type(
field_t),
intent(inout) :: x
131 integer,
intent(in) :: n
132 real(kind=
rp),
dimension(n),
intent(inout) :: f
133 type(
coef_t),
intent(inout) :: coef
135 type(
gs_t),
intent(inout) :: gs_h
137 integer,
optional,
intent(in) :: niter
138 integer :: i, j, k, l, iter, max_iter, s, ierr, it
139 real(kind=
rp) :: rnorm, rtr, rtz1, tmp
140 real(kind=
rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
141 real(kind=
rp),
dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
142 real(kind=
rp) :: p_c(4*this%s+1,this%s+1)
143 real(kind=
rp) :: r_c(4*this%s+1,this%s+1)
144 real(kind=
rp) :: z_c(4*this%s+1,this%s+1)
145 real(kind=
rp) :: x_c(4*this%s+1,this%s+1)
147 associate(pr => this%PR, r => this%r, p => this%p)
149 if (
present(niter))
then
152 max_iter = this%max_iter
154 norm_fac = 1.0_rp / sqrt(coef%volume)
159 call this%M%solve(p, r, n)
161 rtr =
glsc3(r, coef%mult, r, n)
162 rnorm = sqrt(rtr)*norm_fac
163 ksp_results%res_start = rnorm
164 ksp_results%res_final = rnorm
167 if(
abscmp(rnorm, 0.0_rp))
return
168 do while (iter < max_iter)
171 call copy(pr(1,2*s+2), r, n)
175 if (mod(i,2) .eq. 0)
then
176 call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
177 call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
180 call this%M%solve(pr(1,i), pr(1,i-1), n)
185 if (mod(i,2) == 0)
then
186 call this%M%solve(pr(1,i+1), pr(1,i), n)
188 call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
189 call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
195 call rzero(p_c, (4*s+1) * (s+1))
197 call rzero(r_c, (4*s+1) * (s+1))
198 r_c(2*s+2,1) = 1.0_rp
199 call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
200 call rzero(x_c, (4*s+1) * (s+1))
201 call rzero(temp, (4*s+1)**2)
203 do i = 0, n, neko_blk_size
205 if (i + neko_blk_size .le. n)
then
209 do k = 1, neko_blk_size
210 temp(it,1) = temp(it,1) &
211 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
220 temp(it,1) = temp(it,1) &
221 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
228 call mpi_allreduce(temp, temp2, it, &
239 call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
244 call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
245 call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
249 alpha1 = alpha1 + temp(i,1) * z_c(i,j)
250 alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
252 alpha(j) = alpha1/alpha2
255 x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
258 tmp = tmp + tt(i,k) * p_c(k,j)
260 r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
263 tmp = tmp + tt(i,k)*r_c(k,j+1)
268 call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
271 alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
273 beta(j) = alpha2 / alpha1
275 p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
282 do i = 0, n, neko_blk_size
283 if (i + neko_blk_size .le. n)
then
285 do k = 1, neko_blk_size
286 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
287 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
288 tmp = pr(i+k,j) * r_c(j,s+1)
289 r(i+k) = r(i+k) + tmp
292 do k = 1, neko_blk_size
293 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
298 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
299 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
300 tmp = pr(i+k,j) * r_c(j,s+1)
301 r(i+k) = r(i+k) + tmp
305 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
310 call mpi_allreduce(rtr, tmp, 1, &
312 rnorm = norm_fac*sqrt(tmp)
313 if( rnorm <= this%abs_tol)
exit
316 ksp_results%res_final = rnorm
317 ksp_results%iter = iter
325 integer,
intent(in) :: s
326 real(kind=rp),
intent(inout) :: tt(4*s+1,4*s+1)
328 mlen = (4*s+1)*(4*s+1)
334 tt(2*s+2+i,2*s+1+i) = 1.0_rp
Defines a Matrix-vector product.
Defines a boundary condition.
subroutine, public bc_list_apply_scalar(bclst, x, n, t, tstep)
Apply a list of boundary conditions to a scalar field.
Defines a communication avoiding Conjugate Gradient method.
subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol)
Initialise a s-step CA PCG solver.
subroutine construct_basis_matrix(Tt, s)
Monomial matrix constuction, not sparse.
type(ksp_monitor_t) function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Implements the base abstract type for Krylov solvers plus helper types.
integer, parameter, public ksp_max_iter
Maximum number of iters.
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
subroutine, public copy(a, b, n)
Copy a vector .
subroutine, public rzero(a, n)
Zero a real vector.
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
subroutine neko_warning(warning_msg)
Base type for a matrix-vector product providing .
A list of boundary conditions.
S-step communication avoiding preconditioned conjugate gradient method.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Type for storing initial and final residuals in a Krylov solver.
Base abstract type for a canonical Krylov method, solving .
Defines a canonical Krylov preconditioner.