Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cg
35 use neko_config, only : neko_blk_size
36 use num_types, only: rp, xp
38 use precon, only : pc_t
39 use ax_product, only : ax_t
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, abscmp
46 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
47 implicit none
48 private
49
50 integer, parameter :: cg_p_space = 7
51
53 type, public, extends(ksp_t) :: cg_t
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp), allocatable :: p(:,:)
57 real(kind=rp), allocatable :: z(:)
58 real(kind=rp), allocatable :: alpha(:)
59 contains
60 procedure, pass(this) :: init => cg_init
61 procedure, pass(this) :: free => cg_free
62 procedure, pass(this) :: solve => cg_solve
63 procedure, pass(this) :: solve_coupled => cg_solve_coupled
64 end type cg_t
65
66contains
67
69 subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
70 class(cg_t), intent(inout), target :: this
71 integer, intent(in) :: max_iter
72 class(pc_t), optional, intent(in), target :: M
73 integer, intent(in) :: n
74 real(kind=rp), optional, intent(in) :: rel_tol
75 real(kind=rp), optional, intent(in) :: abs_tol
76 logical, optional, intent(in) :: monitor
77
78 call this%free()
79
80 allocate(this%w(n))
81 allocate(this%r(n))
82 allocate(this%p(n,cg_p_space))
83 allocate(this%z(n))
84 allocate(this%alpha(cg_p_space))
85
86 if (present(m)) then
87 this%M => m
88 end if
89 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
90 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
91 else if (present(rel_tol) .and. present(abs_tol)) then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (present(monitor) .and. present(abs_tol)) then
94 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
95 else if (present(rel_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
97 else if (present(rel_tol)) then
98 call this%ksp_init(max_iter, rel_tol = rel_tol)
99 else if (present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol)
101 else if (present(monitor)) then
102 call this%ksp_init(max_iter, monitor = monitor)
103 else
104 call this%ksp_init(max_iter)
105 end if
106
107 end subroutine cg_init
108
110 subroutine cg_free(this)
111 class(cg_t), intent(inout) :: this
112
113 call this%ksp_free()
114
115 if (allocated(this%w)) then
116 deallocate(this%w)
117 end if
118
119 if (allocated(this%r)) then
120 deallocate(this%r)
121 end if
122
123 if (allocated(this%p)) then
124 deallocate(this%p)
125 end if
126
127 if (allocated(this%z)) then
128 deallocate(this%z)
129 end if
130
131 if (allocated(this%alpha)) then
132 deallocate(this%alpha)
133 end if
134
135 nullify(this%M)
136
137 end subroutine cg_free
138
140 function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
141 class(cg_t), intent(inout) :: this
142 class(ax_t), intent(in) :: ax
143 type(field_t), intent(inout) :: x
144 integer, intent(in) :: n
145 real(kind=rp), dimension(n), intent(in) :: f
146 type(coef_t), intent(inout) :: coef
147 type(bc_list_t), intent(inout) :: blst
148 type(gs_t), intent(inout) :: gs_h
149 type(ksp_monitor_t) :: ksp_results
150 integer, optional, intent(in) :: niter
151 integer :: iter, max_iter, i, j, k, p_cur, p_prev, ierr
152 real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(neko_blk_size)
153 real(kind=rp) :: beta, pap, norm_fac, tmp
154
155 if (present(niter)) then
156 max_iter = niter
157 else
158 max_iter = this%max_iter
159 end if
160 norm_fac = 1.0_rp / sqrt(coef%volume)
161
162 associate(w => this%w, r => this%r, p => this%p, &
163 z => this%z, alpha => this%alpha)
164
165 rtz1 = 1.0_rp
166 rtr = 0.0_rp
167 !$omp parallel do reduction(+:rtr)
168 do i = 1, n
169 x%x(i,1,1,1) = 0.0_rp
170 p(i, cg_p_space) = 0.0_rp
171 r(i) = f(i)
172 rtr = rtr + (r(i) * coef%mult(i,1,1,1) * r(i))
173 end do
174 !$omp end parallel do
175
176 call mpi_allreduce(mpi_in_place, rtr, 1, &
177 mpi_real_precision, mpi_sum, neko_comm, ierr)
178
179 rnorm = sqrt(rtr) * norm_fac
180 ksp_results%res_start = rnorm
181 ksp_results%res_final = rnorm
182 ksp_results%iter = 0
183 if(abscmp(rnorm, 0.0_rp)) then
184 ksp_results%converged = .true.
185 return
186 end if
187
188 p_prev = cg_p_space
189 p_cur = 1
190 call this%monitor_start('CG')
191 do iter = 1, max_iter
192 call this%M%solve(z, r, n)
193 rtz2 = rtz1
194 rtz1 = glsc3(r, coef%mult, z, n)
195
196 beta = rtz1 / rtz2
197 if (iter .eq. 1) beta = 0.0_rp
198 !$omp parallel do
199 do i = 1, n
200 p(i,p_cur) = z(i) + beta * p(i,p_prev)
201 end do
202 !$omp end parallel do
203
204 call ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh)
205 call gs_h%op(w, n, gs_op_add)
206 call blst%apply(w, n)
207
208 pap = glsc3(w, coef%mult, p(1,p_cur), n)
209
210 alpha(p_cur) = rtz1 / pap
211 call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n)
212 rnorm = sqrt(rtr) * norm_fac
213 call this%monitor_iter(iter, rnorm)
214
215 if ((p_cur .eq. cg_p_space) .or. &
216 (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then
217 !$omp parallel do private(j, k, x_plus, tmp)
218 do i = 0, n-1, neko_blk_size
219 if (i + neko_blk_size .le. n) then
220 !$omp simd
221 do k = 1, neko_blk_size
222 x_plus(k) = 0.0_rp
223 end do
224 do j = 1, p_cur
225 !$omp simd
226 do k = 1, neko_blk_size
227 x_plus(k) = x_plus(k) + alpha(j) * p(i+k,j)
228 end do
229 end do
230 !$omp simd
231 do k = 1, neko_blk_size
232 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
233 end do
234 else
235 do k = 1, n - i
236 tmp = 0.0_rp
237 do j = 1, p_cur
238 tmp = tmp + alpha(j) * p(i+k,j)
239 end do
240 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + tmp
241 end do
242 end if
243 end do
244 !$omp end parallel do
245 p_prev = p_cur
246 p_cur = 1
247 if (rnorm .lt. this%abs_tol) exit
248 else
249 p_prev = p_cur
250 p_cur = p_cur + 1
251 end if
252 end do
253 end associate
254 call this%monitor_stop()
255 ksp_results%res_final = rnorm
256 ksp_results%iter = iter
257 ksp_results%converged = this%is_converged(iter, rnorm)
258 end function cg_solve
259
260 subroutine second_cg_part(rtr, r, mult, w, alpha, n)
261 integer, intent(in) :: n
262 real(kind=rp), intent(inout) :: r(n), rtr
263 real(kind=xp) :: tmp
264 real(kind=rp), intent(in) ::mult(n), w(n), alpha
265 integer :: i, ierr
266
267 tmp = 0.0_xp
268 !$omp parallel do reduction(+:tmp)
269 do i = 1, n
270 r(i) = r(i) - alpha*w(i)
271 tmp = tmp + r(i) * r(i) * mult(i)
272 end do
273 !$omp end parallel do
274 call mpi_allreduce(mpi_in_place, tmp, 1, &
275 mpi_extra_precision, mpi_sum, neko_comm, ierr)
276 rtr = tmp
277
278 end subroutine second_cg_part
279
281 function cg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
282 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
283 class(cg_t), intent(inout) :: this
284 class(ax_t), intent(in) :: ax
285 type(field_t), intent(inout) :: x
286 type(field_t), intent(inout) :: y
287 type(field_t), intent(inout) :: z
288 integer, intent(in) :: n
289 real(kind=rp), dimension(n), intent(in) :: fx
290 real(kind=rp), dimension(n), intent(in) :: fy
291 real(kind=rp), dimension(n), intent(in) :: fz
292 type(coef_t), intent(inout) :: coef
293 type(bc_list_t), intent(inout) :: blstx
294 type(bc_list_t), intent(inout) :: blsty
295 type(bc_list_t), intent(inout) :: blstz
296 type(gs_t), intent(inout) :: gs_h
297 type(ksp_monitor_t), dimension(3) :: ksp_results
298 integer, optional, intent(in) :: niter
299
300 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
301 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
302 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
303
304 end function cg_solve_coupled
305
306end module cg
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods.
Definition cg.f90:34
subroutine cg_free(this)
Deallocate a standard PCG solver.
Definition cg.f90:111
integer, parameter cg_p_space
Definition cg.f90:50
type(ksp_monitor_t) function, dimension(3) cg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Definition cg.f90:283
subroutine cg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard PCG solver.
Definition cg.f90:70
subroutine second_cg_part(rtr, r, mult, w, alpha, n)
Definition cg.f90:261
type(ksp_monitor_t) function cg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition cg.f90:141
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:52
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:53
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Standard preconditioned conjugate gradient method.
Definition cg.f90:53
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40