Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cg
35 use neko_config, only : neko_blk_size
36 use num_types, only: rp, xp
38 use precon, only : pc_t
39 use ax_product, only : ax_t
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, rzero, copy, abscmp
46 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
47 implicit none
48 private
49
50 integer, parameter :: cg_p_space = 7
51
53 type, public, extends(ksp_t) :: cg_t
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp), allocatable :: p(:,:)
57 real(kind=rp), allocatable :: z(:)
58 real(kind=rp), allocatable :: alpha(:)
59 contains
60 procedure, pass(this) :: init => cg_init
61 procedure, pass(this) :: free => cg_free
62 procedure, pass(this) :: solve => cg_solve
63 procedure, pass(this) :: solve_coupled => cg_solve_coupled
64 end type cg_t
65
66contains
67
69 subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
70 class(cg_t), intent(inout), target :: this
71 integer, intent(in) :: max_iter
72 class(pc_t), optional, intent(in), target :: M
73 integer, intent(in) :: n
74 real(kind=rp), optional, intent(in) :: rel_tol
75 real(kind=rp), optional, intent(in) :: abs_tol
76 logical, optional, intent(in) :: monitor
77
78 call this%free()
79
80 allocate(this%w(n))
81 allocate(this%r(n))
82 allocate(this%p(n,cg_p_space))
83 allocate(this%z(n))
84 allocate(this%alpha(cg_p_space))
85
86 if (present(m)) then
87 this%M => m
88 end if
89 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
90 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
91 else if (present(rel_tol) .and. present(abs_tol)) then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (present(monitor) .and. present(abs_tol)) then
94 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
95 else if (present(rel_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
97 else if (present(rel_tol)) then
98 call this%ksp_init(max_iter, rel_tol = rel_tol)
99 else if (present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol)
101 else if (present(monitor)) then
102 call this%ksp_init(max_iter, monitor = monitor)
103 else
104 call this%ksp_init(max_iter)
105 end if
106
107 end subroutine cg_init
108
110 subroutine cg_free(this)
111 class(cg_t), intent(inout) :: this
112
113 call this%ksp_free()
114
115 if (allocated(this%w)) then
116 deallocate(this%w)
117 end if
118
119 if (allocated(this%r)) then
120 deallocate(this%r)
121 end if
122
123 if (allocated(this%p)) then
124 deallocate(this%p)
125 end if
126
127 if (allocated(this%z)) then
128 deallocate(this%z)
129 end if
130
131 if (allocated(this%alpha)) then
132 deallocate(this%alpha)
133 end if
134
135 nullify(this%M)
136
137 end subroutine cg_free
138
140 function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
141 class(cg_t), intent(inout) :: this
142 class(ax_t), intent(in) :: ax
143 type(field_t), intent(inout) :: x
144 integer, intent(in) :: n
145 real(kind=rp), dimension(n), intent(in) :: f
146 type(coef_t), intent(inout) :: coef
147 type(bc_list_t), intent(inout) :: blst
148 type(gs_t), intent(inout) :: gs_h
149 type(ksp_monitor_t) :: ksp_results
150 integer, optional, intent(in) :: niter
151 integer :: iter, max_iter, i, j, k, p_cur, p_prev, blk_size
152 real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(neko_blk_size)
153 real(kind=rp) :: beta, pap, norm_fac
154
155 if (present(niter)) then
156 max_iter = niter
157 else
158 max_iter = this%max_iter
159 end if
160 norm_fac = 1.0_rp / sqrt(coef%volume)
161
162 associate(w => this%w, r => this%r, p => this%p, &
163 z => this%z, alpha => this%alpha)
164
165 rtz1 = 1.0_rp
166 call rzero(x%x, n)
167 call rzero(p(1,cg_p_space), n)
168 call copy(r, f, n)
169
170 rtr = glsc3(r, coef%mult, r, n)
171 rnorm = sqrt(rtr) * norm_fac
172 ksp_results%res_start = rnorm
173 ksp_results%res_final = rnorm
174 ksp_results%iter = 0
175 if(abscmp(rnorm, 0.0_rp)) then
176 ksp_results%converged = .true.
177 return
178 end if
179
180 p_prev = cg_p_space
181 p_cur = 1
182 call this%monitor_start('CG')
183 do iter = 1, max_iter
184 call this%M%solve(z, r, n)
185 rtz2 = rtz1
186 rtz1 = glsc3(r, coef%mult, z, n)
187
188 beta = rtz1 / rtz2
189 if (iter .eq. 1) beta = 0.0_rp
190 do i = 1, n
191 p(i,p_cur) = z(i) + beta * p(i,p_prev)
192 end do
193
194 call ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh)
195 call gs_h%op(w, n, gs_op_add)
196 call blst%apply(w, n)
197
198 pap = glsc3(w, coef%mult, p(1,p_cur), n)
199
200 alpha(p_cur) = rtz1 / pap
201 call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n)
202 rnorm = sqrt(rtr) * norm_fac
203 call this%monitor_iter(iter, rnorm)
204
205 if ((p_cur .eq. cg_p_space) .or. &
206 (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then
207 !$omp parallel do private(blk_size, j, k, x_plus)
208 do i = 0, n, neko_blk_size
209 blk_size = min(neko_blk_size, n - i)
210 do concurrent(k = 1:blk_size)
211 x_plus(k) = 0.0_rp
212 end do
213
214 do j = 1, p_cur
215 do concurrent(k = 1:blk_size)
216 x_plus(k) = x_plus(k) + alpha(j) * p(i+k,j)
217 end do
218 end do
219
220 do concurrent(k = 1:blk_size)
221 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
222 end do
223 end do
224 !$omp end parallel do
225 p_prev = p_cur
226 p_cur = 1
227 if (rnorm .lt. this%abs_tol) exit
228 else
229 p_prev = p_cur
230 p_cur = p_cur + 1
231 end if
232 end do
233 end associate
234 call this%monitor_stop()
235 ksp_results%res_final = rnorm
236 ksp_results%iter = iter
237 ksp_results%converged = this%is_converged(iter, rnorm)
238 end function cg_solve
239
240 subroutine second_cg_part(rtr, r, mult, w, alpha, n)
241 integer, intent(in) :: n
242 real(kind=rp), intent(inout) :: r(n), rtr
243 real(kind=xp) :: tmp
244 real(kind=rp), intent(in) ::mult(n), w(n), alpha
245 integer :: i, ierr
246
247 tmp = 0.0_xp
248 !$omp parallel do reduction(+:tmp)
249 do i = 1, n
250 r(i) = r(i) - alpha*w(i)
251 tmp = tmp + r(i) * r(i) * mult(i)
252 end do
253 !$omp end parallel do
254 call mpi_allreduce(mpi_in_place, tmp, 1, &
255 mpi_extra_precision, mpi_sum, neko_comm, ierr)
256 rtr = tmp
257
258 end subroutine second_cg_part
259
261 function cg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
262 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
263 class(cg_t), intent(inout) :: this
264 class(ax_t), intent(in) :: ax
265 type(field_t), intent(inout) :: x
266 type(field_t), intent(inout) :: y
267 type(field_t), intent(inout) :: z
268 integer, intent(in) :: n
269 real(kind=rp), dimension(n), intent(in) :: fx
270 real(kind=rp), dimension(n), intent(in) :: fy
271 real(kind=rp), dimension(n), intent(in) :: fz
272 type(coef_t), intent(inout) :: coef
273 type(bc_list_t), intent(inout) :: blstx
274 type(bc_list_t), intent(inout) :: blsty
275 type(bc_list_t), intent(inout) :: blstz
276 type(gs_t), intent(inout) :: gs_h
277 type(ksp_monitor_t), dimension(3) :: ksp_results
278 integer, optional, intent(in) :: niter
279
280 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
281 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
282 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
283
284 end function cg_solve_coupled
285
286end module cg
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods.
Definition cg.f90:34
subroutine cg_free(this)
Deallocate a standard PCG solver.
Definition cg.f90:111
integer, parameter cg_p_space
Definition cg.f90:50
type(ksp_monitor_t) function, dimension(3) cg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Definition cg.f90:263
subroutine cg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard PCG solver.
Definition cg.f90:70
subroutine second_cg_part(rtr, r, mult, w, alpha, n)
Definition cg.f90:241
type(ksp_monitor_t) function cg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition cg.f90:141
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:52
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1068
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:250
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:206
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Standard preconditioned conjugate gradient method.
Definition cg.f90:53
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:62
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40