Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cacg.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cacg
35 use num_types, only : rp
36 use neko_config, only : neko_blk_size
38 use precon, only : pc_t
39 use ax_product, only : ax_t
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, rzero, copy, x_update, abscmp
45 use utils, only : neko_warning
47 use mpi_f08, only : mpi_allreduce, mpi_sum
48 use mxm_wrapper
49 implicit none
50 private
51
53 type, public, extends(ksp_t) :: cacg_t
54 real(kind=rp), allocatable :: r(:)
55 real(kind=rp), allocatable :: p(:)
56 real(kind=rp), allocatable :: pr(:,:)
57 integer :: s = 4
58 contains
59 procedure, pass(this) :: init => cacg_init
60 procedure, pass(this) :: free => cacg_free
61 procedure, pass(this) :: solve => cacg_solve
62 procedure, pass(this) :: solve_coupled => cacg_solve_coupled
63 end type cacg_t
64
65contains
66
68 subroutine cacg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
69 class(cacg_t), target, intent(inout) :: this
70 class(pc_t), optional, intent(in), target :: M
71 integer, intent(in) :: n
72 integer, intent(in) :: max_iter
73 real(kind=rp), optional, intent(in) :: rel_tol
74 real(kind=rp), optional, intent(in) :: abs_tol
75 logical, optional, intent(in) :: monitor
76 call this%free()
77
78 if (pe_rank .eq. 0) then
79 call neko_warning("Communication Avoiding CG chosen,&
80 & be aware of potential instabilities")
81 end if
82
83 allocate(this%r(n))
84 allocate(this%p(n))
85 allocate(this%PR(n,4*this%s+1))
86 if (present(m)) then
87 this%M => m
88 end if
89
90 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
91 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
92 else if (present(rel_tol) .and. present(abs_tol)) then
93 call this%ksp_init(max_iter, rel_tol, abs_tol)
94 else if (present(monitor) .and. present(abs_tol)) then
95 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
96 else if (present(rel_tol) .and. present(monitor)) then
97 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
98 else if (present(rel_tol)) then
99 call this%ksp_init(max_iter, rel_tol = rel_tol)
100 else if (present(abs_tol)) then
101 call this%ksp_init(max_iter, abs_tol = abs_tol)
102 else if (present(monitor)) then
103 call this%ksp_init(max_iter, monitor = monitor)
104 else
105 call this%ksp_init(max_iter)
106 end if
107
108 end subroutine cacg_init
109
111 subroutine cacg_free(this)
112 class(cacg_t), intent(inout) :: this
113
114 call this%ksp_free()
115
116 if (allocated(this%PR)) then
117 deallocate(this%PR)
118 end if
119
120 if (allocated(this%r)) then
121 deallocate(this%r)
122 end if
123
124 if (allocated(this%p)) then
125 deallocate(this%p)
126 end if
127
128 nullify(this%M)
129
130
131 end subroutine cacg_free
132
134 function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
135 class(cacg_t), intent(inout) :: this
136 class(ax_t), intent(in) :: ax
137 type(field_t), intent(inout) :: x
138 integer, intent(in) :: n
139 real(kind=rp), dimension(n), intent(in) :: f
140 type(coef_t), intent(inout) :: coef
141 type(bc_list_t), intent(inout) :: blst
142 type(gs_t), intent(inout) :: gs_h
143 type(ksp_monitor_t) :: ksp_results
144 integer, optional, intent(in) :: niter
145 integer :: i, j, k, l, iter, max_iter, s, ierr, it
146 real(kind=rp) :: rnorm, rtr, rtz1, tmp
147 real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
148 real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
149 real(kind=rp) :: p_c(4*this%s+1,this%s+1)
150 real(kind=rp) :: r_c(4*this%s+1,this%s+1)
151 real(kind=rp) :: z_c(4*this%s+1,this%s+1)
152 real(kind=rp) :: x_c(4*this%s+1,this%s+1)
153
154 associate(pr => this%PR, r => this%r, p => this%p)
155 s = this%s
156 if (present(niter)) then
157 max_iter = niter
158 else
159 max_iter = this%max_iter
160 end if
161 norm_fac = 1.0_rp / sqrt(coef%volume)
162
163 rtz1 = 1.0_rp
164 call rzero(x%x, n)
165 call copy(r, f, n)
166 call this%M%solve(p, r, n)
167
168 rtr = glsc3(r, coef%mult, r, n)
169 rnorm = sqrt(rtr)*norm_fac
170 ksp_results%res_start = rnorm
171 ksp_results%res_final = rnorm
172 ksp_results%iter = 0
173 iter = 0
174 if(abscmp(rnorm, 0.0_rp)) then
175 ksp_results%converged = .true.
176 end if
177 call this%monitor_start('CACG')
178 do while (iter < max_iter)
179
180 call copy(pr,p, n)
181 call copy(pr(1,2*s+2), r, n)
182
183 !Here we have hardcoded a monomial basis atm.
184 do i = 2, 2*s + 1
185 if (mod(i,2) .eq. 0) then
186 call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
187 call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
188 call blst%apply_scalar(pr(1,i), n)
189 else
190 call this%M%solve(pr(1,i), pr(1,i-1), n)
191 end if
192 end do
193
194 do i = 2*s+2, 4*s
195 if (mod(i,2) == 0) then
196 call this%M%solve(pr(1,i+1), pr(1,i), n)
197 else
198 call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
199 call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
200 call blst%apply_scalar(pr(1,1+i), n)
201 end if
202 end do
203
204 call construct_basis_matrix(tt, s)
205 call rzero(p_c, (4*s+1) * (s+1))
206 p_c(1,1) = 1.0_rp
207 call rzero(r_c, (4*s+1) * (s+1))
208 r_c(2*s+2,1) = 1.0_rp
209 call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
210 call rzero(x_c, (4*s+1) * (s+1))
211 call rzero(temp, (4*s+1)**2)
212
213 do i = 0, n, neko_blk_size
214 it = 0
215 if (i + neko_blk_size .le. n) then
216 do j = 1, 4*s+1
217 do l = 1, j
218 it = it + 1
219 do k = 1, neko_blk_size
220 temp(it,1) = temp(it,1) &
221 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
222 end do
223 end do
224 end do
225 else
226 do j = 1, 4*s+1
227 do l = 1, j
228 it = it + 1
229 do k = 1, n-i
230 temp(it,1) = temp(it,1) &
231 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
232 end do
233 end do
234 end do
235 end if
236 end do
237
238 call mpi_allreduce(temp, temp2, it, &
239 mpi_real_precision, mpi_sum, neko_comm, ierr)
240 it = 0
241 do j = 1, 4*s+1
242 do k = 1, j
243 it = it + 1
244 g(j,k) = temp2(it,1)
245 g(k,j) = temp2(it,1)
246 end do
247 end do
248
249 call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
250
251 do j = 1, s
252 iter = iter + 1
253
254 call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
255 call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
256 alpha1 = 0.0_rp
257 alpha2 = 0.0_rp
258 do i = 1,4*s+1
259 alpha1 = alpha1 + temp(i,1) * z_c(i,j)
260 alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
261 end do
262 alpha(j) = alpha1/alpha2
263
264 do i = 1, 4*s+1
265 x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
266 tmp = 0.0_rp
267 do k = 1, 4*s+1
268 tmp = tmp + tt(i,k) * p_c(k,j)
269 end do
270 r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
271 tmp = 0.0_rp
272 do k = 1, 4*s+1
273 tmp = tmp + tt(i,k)*r_c(k,j+1)
274 end do
275 z_c(i,j+1) = tmp
276 end do
277
278 call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
279 alpha2 = 0.0_rp
280 do i = 1,4*s+1
281 alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
282 end do
283 beta(j) = alpha2 / alpha1
284 do i = 1,4*s+1
285 p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
286 end do
287 end do
288
289 call rzero(p, n)
290 call rzero(r, n)
291 rtr = 0.0_rp
292 do i = 0, n, neko_blk_size
293 if (i + neko_blk_size .le. n) then
294 do j = 1, 4*s + 1
295 do k = 1, neko_blk_size
296 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
297 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
298 tmp = pr(i+k,j) * r_c(j,s+1)
299 r(i+k) = r(i+k) + tmp
300 end do
301 end do
302 do k = 1, neko_blk_size
303 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
304 end do
305 else
306 do j = 1,4*s+1
307 do k = 1, n-i
308 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
309 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
310 tmp = pr(i+k,j) * r_c(j,s+1)
311 r(i+k) = r(i+k) + tmp
312 end do
313 end do
314 do k = 1, n-i
315 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
316 end do
317 end if
318 end do
319
320 call mpi_allreduce(rtr, tmp, 1, &
321 mpi_real_precision, mpi_sum, neko_comm, ierr)
322 rnorm = norm_fac*sqrt(tmp)
323 call this%monitor_iter(iter, rnorm)
324 if( rnorm <= this%abs_tol) exit
325 end do
326 call this%monitor_stop()
327 ksp_results%res_final = rnorm
328 ksp_results%iter = iter
329 ksp_results%converged = this%is_converged(iter, rnorm)
330
331 end associate
332
333 end function cacg_solve
334
336 subroutine construct_basis_matrix(Tt, s)
337 integer, intent(in) :: s
338 real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)
339 integer :: mlen, i
340 mlen = (4*s+1)*(4*s+1)
341 call rzero(tt,mlen)
342 do i = 1, 2*s
343 tt(i+1,i) = 1.0_rp
344 end do
345 do i = 1, (2*s-1)
346 tt(2*s+2+i,2*s+1+i) = 1.0_rp
347 end do
348 end subroutine construct_basis_matrix
349
351 function cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
352 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
353 class(cacg_t), intent(inout) :: this
354 class(ax_t), intent(in) :: ax
355 type(field_t), intent(inout) :: x
356 type(field_t), intent(inout) :: y
357 type(field_t), intent(inout) :: z
358 integer, intent(in) :: n
359 real(kind=rp), dimension(n), intent(in) :: fx
360 real(kind=rp), dimension(n), intent(in) :: fy
361 real(kind=rp), dimension(n), intent(in) :: fz
362 type(coef_t), intent(inout) :: coef
363 type(bc_list_t), intent(inout) :: blstx
364 type(bc_list_t), intent(inout) :: blsty
365 type(bc_list_t), intent(inout) :: blstz
366 type(gs_t), intent(inout) :: gs_h
367 type(ksp_monitor_t), dimension(3) :: ksp_results
368 integer, optional, intent(in) :: niter
369
370 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
371 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
372 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
373
374 end function cacg_solve_coupled
375
376end module cacg
377
378
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines a communication avoiding Conjugate Gradient method.
Definition cacg.f90:34
type(ksp_monitor_t) function cacg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition cacg.f90:135
subroutine construct_basis_matrix(tt, s)
Monomial matrix constuction, not sparse.
Definition cacg.f90:337
type(ksp_monitor_t) function, dimension(3) cacg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
S-step CA PCG coupled solve.
Definition cacg.f90:353
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition cacg.f90:112
subroutine cacg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a s-step CA PCG solver.
Definition cacg.f90:69
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
integer, public pe_rank
MPI rank.
Definition comm.F90:55
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:992
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:284
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
S-step communication avoiding preconditioned conjugate gradient method.
Definition cacg.f90:53
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40