Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cacg.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cacg
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, x_update, abscmp
44 use utils, only : neko_warning
45 use comm
46 use mxm_wrapper
47 implicit none
48 private
49
51 type, public, extends(ksp_t) :: cacg_t
52 real(kind=rp), allocatable :: r(:)
53 real(kind=rp), allocatable :: p(:)
54 real(kind=rp), allocatable :: pr(:,:)
55 integer :: s
56 contains
57 procedure, pass(this) :: init => cacg_init
58 procedure, pass(this) :: free => cacg_free
59 procedure, pass(this) :: solve => cacg_solve
60 procedure, pass(this) :: solve_coupled => cacg_solve_coupled
61 end type cacg_t
62
63contains
64
66 subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol, monitor)
67 class(cacg_t), intent(inout) :: this
68 class(pc_t), optional, intent(in), target :: M
69 integer, intent(in) :: n
70 integer, intent(in) :: max_iter
71 real(kind=rp), optional, intent(in) :: rel_tol
72 real(kind=rp), optional, intent(in) :: abs_tol
73 logical, optional, intent(in) :: monitor
74 integer, optional, intent(in) :: s
75 call this%free()
76
77 if (present(s)) then
78 this%s = s
79 else
80 this%s = 4
81 end if
82 if (pe_rank .eq. 0) then
83 call neko_warning("Communication Avoiding CG chosen,&
84 & be aware of potential instabilities")
85 end if
86
87 allocate(this%r(n))
88 allocate(this%p(n))
89 allocate(this%PR(n,4*this%s+1))
90 if (present(m)) then
91 this%M => m
92 end if
93
94 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
95 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
96 else if (present(rel_tol) .and. present(abs_tol)) then
97 call this%ksp_init(max_iter, rel_tol, abs_tol)
98 else if (present(monitor) .and. present(abs_tol)) then
99 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
100 else if (present(rel_tol) .and. present(monitor)) then
101 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
102 else if (present(rel_tol)) then
103 call this%ksp_init(max_iter, rel_tol = rel_tol)
104 else if (present(abs_tol)) then
105 call this%ksp_init(max_iter, abs_tol = abs_tol)
106 else if (present(monitor)) then
107 call this%ksp_init(max_iter, monitor = monitor)
108 else
109 call this%ksp_init(max_iter)
110 end if
111
112 end subroutine cacg_init
113
115 subroutine cacg_free(this)
116 class(cacg_t), intent(inout) :: this
117
118 call this%ksp_free()
119
120 if (allocated(this%PR)) then
121 deallocate(this%PR)
122 end if
123
124 if (allocated(this%r)) then
125 deallocate(this%r)
126 end if
127
128 if (allocated(this%p)) then
129 deallocate(this%p)
130 end if
131
132 nullify(this%M)
133
134
135 end subroutine cacg_free
136
138 function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
139 class(cacg_t), intent(inout) :: this
140 class(ax_t), intent(in) :: ax
141 type(field_t), intent(inout) :: x
142 integer, intent(in) :: n
143 real(kind=rp), dimension(n), intent(in) :: f
144 type(coef_t), intent(inout) :: coef
145 type(bc_list_t), intent(inout) :: blst
146 type(gs_t), intent(inout) :: gs_h
147 type(ksp_monitor_t) :: ksp_results
148 integer, optional, intent(in) :: niter
149 integer :: i, j, k, l, iter, max_iter, s, ierr, it
150 real(kind=rp) :: rnorm, rtr, rtz1, tmp
151 real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
152 real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
153 real(kind=rp) :: p_c(4*this%s+1,this%s+1)
154 real(kind=rp) :: r_c(4*this%s+1,this%s+1)
155 real(kind=rp) :: z_c(4*this%s+1,this%s+1)
156 real(kind=rp) :: x_c(4*this%s+1,this%s+1)
157
158 associate(pr => this%PR, r => this%r, p => this%p)
159 s = this%s
160 if (present(niter)) then
161 max_iter = niter
162 else
163 max_iter = this%max_iter
164 end if
165 norm_fac = 1.0_rp / sqrt(coef%volume)
166
167 rtz1 = 1.0_rp
168 call rzero(x%x, n)
169 call copy(r, f, n)
170 call this%M%solve(p, r, n)
171
172 rtr = glsc3(r, coef%mult, r, n)
173 rnorm = sqrt(rtr)*norm_fac
174 ksp_results%res_start = rnorm
175 ksp_results%res_final = rnorm
176 ksp_results%iter = 0
177 iter = 0
178 if(abscmp(rnorm, 0.0_rp)) return
179 call this%monitor_start('CACG')
180 do while (iter < max_iter)
181
182 call copy(pr,p, n)
183 call copy(pr(1,2*s+2), r, n)
184
185 !Here we have hardcoded a monomial basis atm.
186 do i = 2, 2*s + 1
187 if (mod(i,2) .eq. 0) then
188 call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
189 call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
190 call blst%apply_scalar(pr(1,i), n)
191 else
192 call this%M%solve(pr(1,i), pr(1,i-1), n)
193 end if
194 end do
195
196 do i = 2*s+2, 4*s
197 if (mod(i,2) == 0) then
198 call this%M%solve(pr(1,i+1), pr(1,i), n)
199 else
200 call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
201 call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
202 call blst%apply_scalar(pr(1,1+i), n)
203 end if
204 end do
205
206 call construct_basis_matrix(tt, s)
207 call rzero(p_c, (4*s+1) * (s+1))
208 p_c(1,1) = 1.0_rp
209 call rzero(r_c, (4*s+1) * (s+1))
210 r_c(2*s+2,1) = 1.0_rp
211 call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
212 call rzero(x_c, (4*s+1) * (s+1))
213 call rzero(temp, (4*s+1)**2)
214
215 do i = 0, n, neko_blk_size
216 it = 0
217 if (i + neko_blk_size .le. n) then
218 do j = 1, 4*s+1
219 do l = 1, j
220 it = it + 1
221 do k = 1, neko_blk_size
222 temp(it,1) = temp(it,1) &
223 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
224 end do
225 end do
226 end do
227 else
228 do j = 1, 4*s+1
229 do l = 1, j
230 it = it + 1
231 do k = 1, n-i
232 temp(it,1) = temp(it,1) &
233 + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
234 end do
235 end do
236 end do
237 end if
238 end do
239
240 call mpi_allreduce(temp, temp2, it, &
241 mpi_real_precision, mpi_sum, neko_comm, ierr)
242 it = 0
243 do j = 1, 4*s+1
244 do k = 1, j
245 it = it + 1
246 g(j,k) = temp2(it,1)
247 g(k,j) = temp2(it,1)
248 end do
249 end do
250
251 call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
252
253 do j = 1, s
254 iter = iter + 1
255
256 call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
257 call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
258 alpha1 = 0.0_rp
259 alpha2 = 0.0_rp
260 do i = 1,4*s+1
261 alpha1 = alpha1 + temp(i,1) * z_c(i,j)
262 alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
263 end do
264 alpha(j) = alpha1/alpha2
265
266 do i = 1, 4*s+1
267 x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
268 tmp = 0.0_rp
269 do k = 1, 4*s+1
270 tmp = tmp + tt(i,k) * p_c(k,j)
271 end do
272 r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
273 tmp = 0.0_rp
274 do k = 1, 4*s+1
275 tmp = tmp + tt(i,k)*r_c(k,j+1)
276 end do
277 z_c(i,j+1) = tmp
278 end do
279
280 call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
281 alpha2 = 0.0_rp
282 do i = 1,4*s+1
283 alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
284 end do
285 beta(j) = alpha2 / alpha1
286 do i = 1,4*s+1
287 p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
288 end do
289 end do
290
291 call rzero(p, n)
292 call rzero(r, n)
293 rtr = 0.0_rp
294 do i = 0, n, neko_blk_size
295 if (i + neko_blk_size .le. n) then
296 do j = 1, 4*s + 1
297 do k = 1, neko_blk_size
298 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
299 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
300 tmp = pr(i+k,j) * r_c(j,s+1)
301 r(i+k) = r(i+k) + tmp
302 end do
303 end do
304 do k = 1, neko_blk_size
305 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
306 end do
307 else
308 do j = 1,4*s+1
309 do k = 1, n-i
310 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
311 p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
312 tmp = pr(i+k,j) * r_c(j,s+1)
313 r(i+k) = r(i+k) + tmp
314 end do
315 end do
316 do k = 1, n-i
317 rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
318 end do
319 end if
320 end do
321
322 call mpi_allreduce(rtr, tmp, 1, &
323 mpi_real_precision, mpi_sum, neko_comm, ierr)
324 rnorm = norm_fac*sqrt(tmp)
325 call this%monitor_iter(iter, rnorm)
326 if( rnorm <= this%abs_tol) exit
327 end do
328 call this%monitor_stop()
329 ksp_results%res_final = rnorm
330 ksp_results%iter = iter
331 ksp_results%converged = this%is_converged(iter, rnorm)
332
333 end associate
334
335 end function cacg_solve
336
338 subroutine construct_basis_matrix(Tt, s)
339 integer, intent(in) :: s
340 real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)
341 integer :: mlen, i
342 mlen = (4*s+1)*(4*s+1)
343 call rzero(tt,mlen)
344 do i = 1, 2*s
345 tt(i+1,i) = 1.0_rp
346 end do
347 do i = 1, (2*s-1)
348 tt(2*s+2+i,2*s+1+i) = 1.0_rp
349 end do
350 end subroutine construct_basis_matrix
351
353 function cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
354 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
355 class(cacg_t), intent(inout) :: this
356 class(ax_t), intent(in) :: ax
357 type(field_t), intent(inout) :: x
358 type(field_t), intent(inout) :: y
359 type(field_t), intent(inout) :: z
360 integer, intent(in) :: n
361 real(kind=rp), dimension(n), intent(in) :: fx
362 real(kind=rp), dimension(n), intent(in) :: fy
363 real(kind=rp), dimension(n), intent(in) :: fz
364 type(coef_t), intent(inout) :: coef
365 type(bc_list_t), intent(inout) :: blstx
366 type(bc_list_t), intent(inout) :: blsty
367 type(bc_list_t), intent(inout) :: blstz
368 type(gs_t), intent(inout) :: gs_h
369 type(ksp_monitor_t), dimension(3) :: ksp_results
370 integer, optional, intent(in) :: niter
371
372 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
373 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
374 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
375
376 end function cacg_solve_coupled
377
378end module cacg
379
380
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines a communication avoiding Conjugate Gradient method.
Definition cacg.f90:34
type(ksp_monitor_t) function cacg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition cacg.f90:139
subroutine cacg_init(this, n, max_iter, m, s, rel_tol, abs_tol, monitor)
Initialise a s-step CA PCG solver.
Definition cacg.f90:67
subroutine construct_basis_matrix(tt, s)
Monomial matrix constuction, not sparse.
Definition cacg.f90:339
type(ksp_monitor_t) function, dimension(3) cacg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
S-step CA PCG coupled solve.
Definition cacg.f90:355
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition cacg.f90:116
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
integer pe_rank
MPI rank.
Definition comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:23
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition math.f90:860
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:266
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
S-step communication avoiding preconditioned conjugate gradient method.
Definition cacg.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40