Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cg
35 use neko_config, only : neko_blk_size
36 use num_types, only: rp, xp
38 use precon, only : pc_t
39 use ax_product, only : ax_t
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, rzero, copy, abscmp
46 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
47 implicit none
48 private
49
50 integer, parameter :: cg_p_space = 7
51
53 type, public, extends(ksp_t) :: cg_t
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp), allocatable :: p(:,:)
57 real(kind=rp), allocatable :: z(:)
58 real(kind=rp), allocatable :: alpha(:)
59 contains
60 procedure, pass(this) :: init => cg_init
61 procedure, pass(this) :: free => cg_free
62 procedure, pass(this) :: solve => cg_solve
63 procedure, pass(this) :: solve_coupled => cg_solve_coupled
64 end type cg_t
65
66contains
67
69 subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
70 class(cg_t), intent(inout), target :: this
71 integer, intent(in) :: max_iter
72 class(pc_t), optional, intent(in), target :: M
73 integer, intent(in) :: n
74 real(kind=rp), optional, intent(in) :: rel_tol
75 real(kind=rp), optional, intent(in) :: abs_tol
76 logical, optional, intent(in) :: monitor
77
78 call this%free()
79
80 allocate(this%w(n))
81 allocate(this%r(n))
82 allocate(this%p(n,cg_p_space))
83 allocate(this%z(n))
84 allocate(this%alpha(cg_p_space))
85
86 if (present(m)) then
87 this%M => m
88 end if
89 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
90 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
91 else if (present(rel_tol) .and. present(abs_tol)) then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (present(monitor) .and. present(abs_tol)) then
94 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
95 else if (present(rel_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
97 else if (present(rel_tol)) then
98 call this%ksp_init(max_iter, rel_tol = rel_tol)
99 else if (present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol)
101 else if (present(monitor)) then
102 call this%ksp_init(max_iter, monitor = monitor)
103 else
104 call this%ksp_init(max_iter)
105 end if
106
107 end subroutine cg_init
108
110 subroutine cg_free(this)
111 class(cg_t), intent(inout) :: this
112
113 call this%ksp_free()
114
115 if (allocated(this%w)) then
116 deallocate(this%w)
117 end if
118
119 if (allocated(this%r)) then
120 deallocate(this%r)
121 end if
122
123 if (allocated(this%p)) then
124 deallocate(this%p)
125 end if
126
127 if (allocated(this%z)) then
128 deallocate(this%z)
129 end if
130
131 if (allocated(this%alpha)) then
132 deallocate(this%alpha)
133 end if
134
135 nullify(this%M)
136
137 end subroutine cg_free
138
140 function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
141 class(cg_t), intent(inout) :: this
142 class(ax_t), intent(in) :: ax
143 type(field_t), intent(inout) :: x
144 integer, intent(in) :: n
145 real(kind=rp), dimension(n), intent(in) :: f
146 type(coef_t), intent(inout) :: coef
147 type(bc_list_t), intent(inout) :: blst
148 type(gs_t), intent(inout) :: gs_h
149 type(ksp_monitor_t) :: ksp_results
150 integer, optional, intent(in) :: niter
151 integer :: iter, max_iter, i, j, k, p_cur, p_prev
152 real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(neko_blk_size)
153 real(kind=rp) :: beta, pap, norm_fac
154
155 if (present(niter)) then
156 max_iter = niter
157 else
158 max_iter = this%max_iter
159 end if
160 norm_fac = 1.0_rp / sqrt(coef%volume)
161
162 associate(w => this%w, r => this%r, p => this%p, &
163 z => this%z, alpha => this%alpha)
164
165 rtz1 = 1.0_rp
166 call rzero(x%x, n)
167 call rzero(p(1,cg_p_space), n)
168 call copy(r, f, n)
169
170 rtr = glsc3(r, coef%mult, r, n)
171 rnorm = sqrt(rtr) * norm_fac
172 ksp_results%res_start = rnorm
173 ksp_results%res_final = rnorm
174 ksp_results%iter = 0
175 if(abscmp(rnorm, 0.0_rp)) then
176 ksp_results%converged = .true.
177 return
178 end if
179
180 p_prev = cg_p_space
181 p_cur = 1
182 call this%monitor_start('CG')
183 do iter = 1, max_iter
184 call this%M%solve(z, r, n)
185 rtz2 = rtz1
186 rtz1 = glsc3(r, coef%mult, z, n)
187
188 beta = rtz1 / rtz2
189 if (iter .eq. 1) beta = 0.0_rp
190 do i = 1, n
191 p(i,p_cur) = z(i) + beta * p(i,p_prev)
192 end do
193
194 call ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh)
195 call gs_h%op(w, n, gs_op_add)
196 call blst%apply(w, n)
197
198 pap = glsc3(w, coef%mult, p(1,p_cur), n)
199
200 alpha(p_cur) = rtz1 / pap
201 call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n)
202 rnorm = sqrt(rtr) * norm_fac
203 call this%monitor_iter(iter, rnorm)
204
205 if ((p_cur .eq. cg_p_space) .or. &
206 (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then
207 do i = 0, n, neko_blk_size
208 if (i + neko_blk_size .le. n) then
209 do k = 1, neko_blk_size
210 x_plus(k) = 0.0_rp
211 end do
212 do j = 1, p_cur
213 do k = 1, neko_blk_size
214 x_plus(k) = x_plus(k) + alpha(j) * p(i+k,j)
215 end do
216 end do
217 do k = 1, neko_blk_size
218 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
219 end do
220 else
221 do k = 1, n-i
222 x_plus(1) = 0.0_rp
223 do j = 1, p_cur
224 x_plus(1) = x_plus(1) + alpha(j) * p(i+k,j)
225 end do
226 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
227 end do
228 end if
229 end do
230 p_prev = p_cur
231 p_cur = 1
232 if (rnorm .lt. this%abs_tol) exit
233 else
234 p_prev = p_cur
235 p_cur = p_cur + 1
236 end if
237 end do
238 end associate
239 call this%monitor_stop()
240 ksp_results%res_final = rnorm
241 ksp_results%iter = iter
242 ksp_results%converged = this%is_converged(iter, rnorm)
243 end function cg_solve
244
245 subroutine second_cg_part(rtr, r, mult, w, alpha, n)
246 integer, intent(in) :: n
247 real(kind=rp), intent(inout) :: r(n), rtr
248 real(kind=xp) :: tmp
249 real(kind=rp), intent(in) ::mult(n), w(n), alpha
250 integer :: i, ierr
251
252 tmp = 0.0_xp
253 do i = 1, n
254 r(i) = r(i) - alpha*w(i)
255 tmp = tmp + r(i) * r(i) * mult(i)
256 end do
257 call mpi_allreduce(mpi_in_place, tmp, 1, &
258 mpi_extra_precision, mpi_sum, neko_comm, ierr)
259 rtr = tmp
260
261 end subroutine second_cg_part
262
264 function cg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
265 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
266 class(cg_t), intent(inout) :: this
267 class(ax_t), intent(in) :: ax
268 type(field_t), intent(inout) :: x
269 type(field_t), intent(inout) :: y
270 type(field_t), intent(inout) :: z
271 integer, intent(in) :: n
272 real(kind=rp), dimension(n), intent(in) :: fx
273 real(kind=rp), dimension(n), intent(in) :: fy
274 real(kind=rp), dimension(n), intent(in) :: fz
275 type(coef_t), intent(inout) :: coef
276 type(bc_list_t), intent(inout) :: blstx
277 type(bc_list_t), intent(inout) :: blsty
278 type(bc_list_t), intent(inout) :: blstz
279 type(gs_t), intent(inout) :: gs_h
280 type(ksp_monitor_t), dimension(3) :: ksp_results
281 integer, optional, intent(in) :: niter
282
283 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
284 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
285 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
286
287 end function cg_solve_coupled
288
289end module cg
290
291
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods.
Definition cg.f90:34
subroutine cg_free(this)
Deallocate a standard PCG solver.
Definition cg.f90:111
integer, parameter cg_p_space
Definition cg.f90:50
type(ksp_monitor_t) function, dimension(3) cg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Definition cg.f90:266
subroutine cg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard PCG solver.
Definition cg.f90:70
subroutine second_cg_part(rtr, r, mult, w, alpha, n)
Definition cg.f90:246
type(ksp_monitor_t) function cg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition cg.f90:141
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:51
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Standard preconditioned conjugate gradient method.
Definition cg.f90:53
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40