Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
cacg.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cacg
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, x_update, abscmp
44 use utils, only : neko_warning
45 use comm
46 use mxm_wrapper
47 implicit none
48 private
49
51 type, public, extends(ksp_t) :: cacg_t
52 real(kind=rp), allocatable :: r(:)
53 real(kind=rp), allocatable :: p(:)
54 real(kind=rp), allocatable :: pr(:,:)
55 integer :: s = 4
56 contains
57 procedure, pass(this) :: init => cacg_init
58 procedure, pass(this) :: free => cacg_free
59 procedure, pass(this) :: solve => cacg_solve
60 procedure, pass(this) :: solve_coupled => cacg_solve_coupled
61 end type cacg_t
62
63contains
64
66 subroutine cacg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
67 class(cacg_t), target, intent(inout) :: this
68 class(pc_t), optional, intent(in), target :: M
69 integer, intent(in) :: n
70 integer, intent(in) :: max_iter
71 real(kind=rp), optional, intent(in) :: rel_tol
72 real(kind=rp), optional, intent(in) :: abs_tol
73 logical, optional, intent(in) :: monitor
74 call this%free()
75
76 if (pe_rank .eq. 0) then
77 call neko_warning("Communication Avoiding CG chosen,&
78 & be aware of potential instabilities")
79 end if
80
81 allocate(this%r(n))
82 allocate(this%p(n))
83 allocate(this%PR(n,4*this%s+1))
84 if (present(m)) then
85 this%M => m
86 end if
87
88 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
89 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
90 else if (present(rel_tol) .and. present(abs_tol)) then
91 call this%ksp_init(max_iter, rel_tol, abs_tol)
92 else if (present(monitor) .and. present(abs_tol)) then
93 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
94 else if (present(rel_tol) .and. present(monitor)) then
95 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
96 else if (present(rel_tol)) then
97 call this%ksp_init(max_iter, rel_tol = rel_tol)
98 else if (present(abs_tol)) then
99 call this%ksp_init(max_iter, abs_tol = abs_tol)
100 else if (present(monitor)) then
101 call this%ksp_init(max_iter, monitor = monitor)
102 else
103 call this%ksp_init(max_iter)
104 end if
105
106 end subroutine cacg_init
107
109 subroutine cacg_free(this)
110 class(cacg_t), intent(inout) :: this
111
112 call this%ksp_free()
113
114 if (allocated(this%PR)) then
115 deallocate(this%PR)
116 end if
117
118 if (allocated(this%r)) then
119 deallocate(this%r)
120 end if
121
122 if (allocated(this%p)) then
123 deallocate(this%p)
124 end if
125
126 nullify(this%M)
127
128
129 end subroutine cacg_free
130
132 function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
133 class(cacg_t), intent(inout) :: this
134 class(ax_t), intent(in) :: ax
135 type(field_t), intent(inout) :: x
136 integer, intent(in) :: n
137 real(kind=rp), dimension(n), intent(in) :: f
138 type(coef_t), intent(inout) :: coef
139 type(bc_list_t), intent(inout) :: blst
140 type(gs_t), intent(inout) :: gs_h
141 type(ksp_monitor_t) :: ksp_results
142 integer, optional, intent(in) :: niter
143 integer :: i, j, k, l, iter, max_iter, s, ierr, it
144 real(kind=rp) :: rnorm, rtr, rtz1, tmp
145 real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
146 real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
147 real(kind=rp) :: p_c(4*this%s+1,this%s+1)
148 real(kind=rp) :: r_c(4*this%s+1,this%s+1)
149 real(kind=rp) :: z_c(4*this%s+1,this%s+1)
150 real(kind=rp) :: x_c(4*this%s+1,this%s+1)
151
152 associate(pr => this%PR, r => this%r, p => this%p)
153 s = this%s
154 if (present(niter)) then
155 max_iter = niter
156 else
157 max_iter = this%max_iter
158 end if
159 norm_fac = 1.0_rp / sqrt(coef%volume)
160
161 rtz1 = 1.0_rp
162 call rzero(x%x, n)
163 call copy(r, f, n)
164 call this%M%solve(p, r, n)
165
166 rtr = glsc3(r, coef%mult, r, n)
167 rnorm = sqrt(rtr)*norm_fac
168 ksp_results%res_start = rnorm
169 ksp_results%res_final = rnorm
170 ksp_results%iter = 0
171 iter = 0
172 if(abscmp(rnorm, 0.0_rp)) return
173 call this%monitor_start('CACG')
174 do while (iter < max_iter)
175
176 call copy(pr,p, n)
177 call copy(pr(1,2*s+2), r, n)
178
179 !Here we have hardcoded a monomial basis atm.
180 do i = 2, 2*s + 1
181 if (mod(i,2) .eq. 0) then
182 call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
183 call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
184 call blst%apply_scalar(pr(1,i), n)
185 else
186 call this%M%solve(pr(1,i), pr(1,i-1), n)
187 end if
188 end do
189
190 do i = 2*s+2, 4*s
191 if (mod(i,2) == 0) then
192 call this%M%solve(pr(1,i+1), pr(1,i), n)
193 else
194 call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
195 call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
196 call blst%apply_scalar(pr(1,1+i), n)
197 end if
198 end do
199
200 call construct_basis_matrix(tt, s)
201 call rzero(p_c, (4*s+1) * (s+1))
202 p_c(1,1) = 1.0_rp
203 call rzero(r_c, (4*s+1) * (s+1))
204 r_c(2*s+2,1) = 1.0_rp
205 call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
206 call rzero(x_c, (4*s+1) * (s+1))
207 call rzero(temp, (4*s+1)**2)
208
209 do i = 0, n, neko_blk_size
210 it = 0
211 if (i + neko_blk_size .le. n) then
212 do j = 1, 4*s+1
213 do l = 1, j
214 it = it + 1
215 do k = 1, neko_blk_size
216 temp(it,1) = temp(it,1) &
217 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
218 end do
219 end do
220 end do
221 else
222 do j = 1, 4*s+1
223 do l = 1, j
224 it = it + 1
225 do k = 1, n-i
226 temp(it,1) = temp(it,1) &
227 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
228 end do
229 end do
230 end do
231 end if
232 end do
233
234 call mpi_allreduce(temp, temp2, it, &
235 mpi_real_precision, mpi_sum, neko_comm, ierr)
236 it = 0
237 do j = 1, 4*s+1
238 do k = 1, j
239 it = it + 1
240 g(j,k) = temp2(it,1)
241 g(k,j) = temp2(it,1)
242 end do
243 end do
244
245 call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
246
247 do j = 1, s
248 iter = iter + 1
249
250 call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
251 call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
252 alpha1 = 0.0_rp
253 alpha2 = 0.0_rp
254 do i = 1,4*s+1
255 alpha1 = alpha1 + temp(i,1) * z_c(i,j)
256 alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
257 end do
258 alpha(j) = alpha1/alpha2
259
260 do i = 1, 4*s+1
261 x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
262 tmp = 0.0_rp
263 do k = 1, 4*s+1
264 tmp = tmp + tt(i,k) * p_c(k,j)
265 end do
266 r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
267 tmp = 0.0_rp
268 do k = 1, 4*s+1
269 tmp = tmp + tt(i,k)*r_c(k,j+1)
270 end do
271 z_c(i,j+1) = tmp
272 end do
273
274 call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
275 alpha2 = 0.0_rp
276 do i = 1,4*s+1
277 alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
278 end do
279 beta(j) = alpha2 / alpha1
280 do i = 1,4*s+1
281 p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
282 end do
283 end do
284
285 call rzero(p, n)
286 call rzero(r, n)
287 rtr = 0.0_rp
288 do i = 0, n, neko_blk_size
289 if (i + neko_blk_size .le. n) then
290 do j = 1, 4*s + 1
291 do k = 1, neko_blk_size
292 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
293 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
294 tmp = pr(i+k,j) * r_c(j,s+1)
295 r(i+k) = r(i+k) + tmp
296 end do
297 end do
298 do k = 1, neko_blk_size
299 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
300 end do
301 else
302 do j = 1,4*s+1
303 do k = 1, n-i
304 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
305 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
306 tmp = pr(i+k,j) * r_c(j,s+1)
307 r(i+k) = r(i+k) + tmp
308 end do
309 end do
310 do k = 1, n-i
311 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
312 end do
313 end if
314 end do
315
316 call mpi_allreduce(rtr, tmp, 1, &
317 mpi_real_precision, mpi_sum, neko_comm, ierr)
318 rnorm = norm_fac*sqrt(tmp)
319 call this%monitor_iter(iter, rnorm)
320 if( rnorm <= this%abs_tol) exit
321 end do
322 call this%monitor_stop()
323 ksp_results%res_final = rnorm
324 ksp_results%iter = iter
325 ksp_results%converged = this%is_converged(iter, rnorm)
326
327 end associate
328
329 end function cacg_solve
330
332 subroutine construct_basis_matrix(Tt, s)
333 integer, intent(in) :: s
334 real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)
335 integer :: mlen, i
336 mlen = (4*s+1)*(4*s+1)
337 call rzero(tt,mlen)
338 do i = 1, 2*s
339 tt(i+1,i) = 1.0_rp
340 end do
341 do i = 1, (2*s-1)
342 tt(2*s+2+i,2*s+1+i) = 1.0_rp
343 end do
344 end subroutine construct_basis_matrix
345
347 function cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
348 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
349 class(cacg_t), intent(inout) :: this
350 class(ax_t), intent(in) :: ax
351 type(field_t), intent(inout) :: x
352 type(field_t), intent(inout) :: y
353 type(field_t), intent(inout) :: z
354 integer, intent(in) :: n
355 real(kind=rp), dimension(n), intent(in) :: fx
356 real(kind=rp), dimension(n), intent(in) :: fy
357 real(kind=rp), dimension(n), intent(in) :: fz
358 type(coef_t), intent(inout) :: coef
359 type(bc_list_t), intent(inout) :: blstx
360 type(bc_list_t), intent(inout) :: blsty
361 type(bc_list_t), intent(inout) :: blstz
362 type(gs_t), intent(inout) :: gs_h
363 type(ksp_monitor_t), dimension(3) :: ksp_results
364 integer, optional, intent(in) :: niter
365
366 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
367 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
368 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
369
370 end function cacg_solve_coupled
371
372end module cacg
373
374
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines a communication avoiding Conjugate Gradient method.
Definition cacg.f90:34
type(ksp_monitor_t) function cacg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition cacg.f90:133
subroutine construct_basis_matrix(tt, s)
Monomial matrix constuction, not sparse.
Definition cacg.f90:333
type(ksp_monitor_t) function, dimension(3) cacg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
S-step CA PCG coupled solve.
Definition cacg.f90:349
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition cacg.f90:110
subroutine cacg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a s-step CA PCG solver.
Definition cacg.f90:67
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:51
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:38
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:46
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:860
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:266
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
S-step communication avoiding preconditioned conjugate gradient method.
Definition cacg.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40