Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pipecg.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module pipecg
35 use neko_config, only : neko_blk_size
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use num_types, only: rp
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, rzero, copy, abscmp
46 use mpi_f08, only : mpi_iallreduce, mpi_in_place, mpi_sum, mpi_wait, &
47 mpi_request, mpi_status
48 implicit none
49 private
50
51 integer, parameter :: pipecg_p_space = 7
52
54 type, public, extends(ksp_t) :: pipecg_t
55 real(kind=rp), allocatable :: p(:)
56 real(kind=rp), allocatable :: q(:)
57 real(kind=rp), allocatable :: r(:)
58 real(kind=rp), allocatable :: s(:)
59 real(kind=rp), allocatable :: u(:,:)
60 real(kind=rp), allocatable :: w(:)
61 real(kind=rp), allocatable :: z(:)
62 real(kind=rp), allocatable :: mi(:)
63 real(kind=rp), allocatable :: ni(:)
64 contains
66 procedure, pass(this) :: init => pipecg_init
68 procedure, pass(this) :: free => pipecg_free
70 procedure, pass(this) :: solve => pipecg_solve
72 procedure, pass(this) :: solve_coupled => pipecg_solve_coupled
73 end type pipecg_t
74
75contains
76
78 subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
79 class(pipecg_t), target, intent(inout) :: this
80 integer, intent(in) :: max_iter
81 class(pc_t), optional, intent(in), target :: M
82 integer, intent(in) :: n
83 real(kind=rp), optional, intent(in) :: rel_tol
84 real(kind=rp), optional, intent(in) :: abs_tol
85 logical, optional, intent(in) :: monitor
86
87 call this%free()
88
89 allocate(this%p(n))
90 allocate(this%q(n))
91 allocate(this%r(n))
92 allocate(this%s(n))
93 allocate(this%u(n,pipecg_p_space+1))
94 allocate(this%w(n))
95 allocate(this%z(n))
96 allocate(this%mi(n))
97 allocate(this%ni(n))
98 if (present(m)) then
99 this%M => m
100 end if
101
102 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
104 else if (present(rel_tol) .and. present(abs_tol)) then
105 call this%ksp_init(max_iter, rel_tol, abs_tol)
106 else if (present(monitor) .and. present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
108 else if (present(rel_tol) .and. present(monitor)) then
109 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
110 else if (present(rel_tol)) then
111 call this%ksp_init(max_iter, rel_tol = rel_tol)
112 else if (present(abs_tol)) then
113 call this%ksp_init(max_iter, abs_tol = abs_tol)
114 else if (present(monitor)) then
115 call this%ksp_init(max_iter, monitor = monitor)
116 else
117 call this%ksp_init(max_iter)
118 end if
119
120 end subroutine pipecg_init
121
123 subroutine pipecg_free(this)
124 class(pipecg_t), intent(inout) :: this
125
126 call this%ksp_free()
127
128 if (allocated(this%p)) then
129 deallocate(this%p)
130 end if
131 if (allocated(this%q)) then
132 deallocate(this%q)
133 end if
134 if (allocated(this%r)) then
135 deallocate(this%r)
136 end if
137 if (allocated(this%s)) then
138 deallocate(this%s)
139 end if
140 if (allocated(this%u)) then
141 deallocate(this%u)
142 end if
143 if (allocated(this%w)) then
144 deallocate(this%w)
145 end if
146 if (allocated(this%z)) then
147 deallocate(this%z)
148 end if
149 if (allocated(this%mi)) then
150 deallocate(this%mi)
151 end if
152 if (allocated(this%ni)) then
153 deallocate(this%ni)
154 end if
155
156 nullify(this%M)
157
158
159 end subroutine pipecg_free
160
162 function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
163 class(pipecg_t), intent(inout) :: this
164 class(ax_t), intent(in) :: ax
165 type(field_t), intent(inout) :: x
166 integer, intent(in) :: n
167 real(kind=rp), dimension(n), intent(in) :: f
168 type(coef_t), intent(inout) :: coef
169 type(bc_list_t), intent(inout) :: blst
170 type(gs_t), intent(inout) :: gs_h
171 type(ksp_monitor_t) :: ksp_results
172 integer, optional, intent(in) :: niter
173 integer :: iter, max_iter, i, j, k, ierr, p_cur, p_prev, u_prev
174 real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
175 real(kind=rp) :: alpha(pipecg_p_space), beta(pipecg_p_space)
176 real(kind=rp) :: gamma1, gamma2, delta
177 real(kind=rp) :: tmp1, tmp2, tmp3, x_plus(neko_blk_size)
178 type(mpi_request) :: request
179 type(mpi_status) :: status
180
181 if (present(niter)) then
182 max_iter = niter
183 else
184 max_iter = this%max_iter
185 end if
186 norm_fac = 1.0_rp / sqrt(coef%volume)
187
188 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
189 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni)
190
191 p_prev = pipecg_p_space
192 u_prev = pipecg_p_space+1
193 p_cur = 1
194 call rzero(x%x, n)
195 call rzero(z, n)
196 call rzero(q, n)
197 call rzero(p, n)
198 call rzero(s, n)
199 call copy(r, f, n)
200 call this%M%solve(u(1,u_prev), r, n)
201 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
202 call gs_h%op(w, n, gs_op_add)
203 call blst%apply(w, n)
204
205 rtr = glsc3(r, coef%mult, r, n)
206 rnorm = sqrt(rtr)*norm_fac
207 ksp_results%res_start = rnorm
208 ksp_results%res_final = rnorm
209 ksp_results%iter = 0
210
211 if(abscmp(rnorm, 0.0_rp)) then
212 ksp_results%converged = .true.
213 return
214 end if
215
216 gamma1 = 0.0_rp
217 tmp1 = 0.0_rp
218 tmp2 = 0.0_rp
219 tmp3 = 0.0_rp
220 !$omp parallel do reduction(+:tmp1,tmp2,tmp3)
221 do i = 1, n
222 tmp1 = tmp1 + r(i) * coef%mult(i,1,1,1) * u(i,u_prev)
223 tmp2 = tmp2 + w(i) * coef%mult(i,1,1,1) * u(i,u_prev)
224 tmp3 = tmp3 + r(i) * coef%mult(i,1,1,1) * r(i)
225 end do
226 !$omp end parallel do
227 reduction(1) = tmp1
228 reduction(2) = tmp2
229 reduction(3) = tmp3
230
231 call this%monitor_start('PipeCG')
232 do iter = 1, max_iter
233 call mpi_iallreduce(mpi_in_place, reduction, 3, &
234 mpi_real_precision, mpi_sum, neko_comm, request, ierr)
235
236 call this%M%solve(mi, w, n)
237 call ax%compute(ni, mi, coef, x%msh, x%Xh)
238 call gs_h%op(ni, n, gs_op_add)
239 call blst%apply(ni, n)
240
241 call mpi_wait(request, status, ierr)
242 gamma2 = gamma1
243 gamma1 = reduction(1)
244 delta = reduction(2)
245 rtr = reduction(3)
246
247 rnorm = sqrt(rtr)*norm_fac
248 call this%monitor_iter(iter, rnorm)
249 if (rnorm .lt. this%abs_tol) exit
250
251 if (iter .gt. 1) then
252 beta(p_cur) = gamma1 / gamma2
253 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
254 else
255 beta(p_cur) = 0.0_rp
256 alpha(p_cur) = gamma1/delta
257 end if
258
259 tmp1 = 0.0_rp
260 tmp2 = 0.0_rp
261 tmp3 = 0.0_rp
262 !$omp parallel do private(k) reduction(+:tmp1,tmp2,tmp3)
263 do i = 0, n, neko_blk_size
264 do k = 1, min(neko_blk_size, n - i)
265 z(i+k) = beta(p_cur) * z(i+k) + ni(i+k)
266 q(i+k) = beta(p_cur) * q(i+k) + mi(i+k)
267 s(i+k) = beta(p_cur) * s(i+k) + w(i+k)
268 r(i+k) = r(i+k) - alpha(p_cur) * s(i+k)
269 u(i+k,p_cur) = u(i+k,u_prev) - alpha(p_cur) * q(i+k)
270 w(i+k) = w(i+k) - alpha(p_cur) * z(i+k)
271 tmp1 = tmp1 + r(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
272 tmp2 = tmp2 + w(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
273 tmp3 = tmp3 + r(i+k) * coef%mult(i+k,1,1,1) * r(i+k)
274 end do
275 end do
276 !$omp end parallel do
277 reduction(1) = tmp1
278 reduction(2) = tmp2
279 reduction(3) = tmp3
280
281 if (p_cur .eq. pipecg_p_space) then
282 !$omp parallel do private(k, j, p_prev, x_plus)
283 do i = 0, n, neko_blk_size
284 do k = 1, min(neko_blk_size, n - i)
285 x_plus(k) = 0.0_rp
286 end do
287 p_prev = pipecg_p_space+1
288 do j = 1, p_cur
289 do k = 1, min(neko_blk_size, n - i)
290 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
291 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
292 end do
293 p_prev = j
294 end do
295 do k = 1, min(neko_blk_size, n - i)
296 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
297 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
298 end do
299 end do
300 !$omp end parallel do
301 p_prev = p_cur
302 u_prev = pipecg_p_space+1
303 alpha(1) = alpha(p_cur)
304 beta(1) = beta(p_cur)
305 p_cur = 1
306 else
307 u_prev = p_cur
308 p_prev = p_cur
309 p_cur = p_cur + 1
310 end if
311 end do
312
313 if ( p_cur .ne. 1) then
314 !$omp parallel do private(k, j, p_prev, x_plus)
315 do i = 0, n, neko_blk_size
316 do k = 1, min(neko_blk_size, n - i)
317 x_plus(k) = 0.0_rp
318 end do
319 p_prev = pipecg_p_space+1
320 do j = 1, p_cur
321 do k = 1, min(neko_blk_size, n - i)
322 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
323 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
324 end do
325 p_prev = j
326 end do
327 do k = 1, min(neko_blk_size, n - i)
328 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
329 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
330 end do
331 end do
332 !$omp end parallel do
333 end if
334 call this%monitor_stop()
335 ksp_results%res_final = rnorm
336 ksp_results%iter = iter
337 ksp_results%converged = this%is_converged(iter, rnorm)
338
339 end associate
340
341 end function pipecg_solve
342
344 function pipecg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
345 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
346 class(pipecg_t), intent(inout) :: this
347 class(ax_t), intent(in) :: ax
348 type(field_t), intent(inout) :: x
349 type(field_t), intent(inout) :: y
350 type(field_t), intent(inout) :: z
351 integer, intent(in) :: n
352 real(kind=rp), dimension(n), intent(in) :: fx
353 real(kind=rp), dimension(n), intent(in) :: fy
354 real(kind=rp), dimension(n), intent(in) :: fz
355 type(coef_t), intent(inout) :: coef
356 type(bc_list_t), intent(inout) :: blstx
357 type(bc_list_t), intent(inout) :: blsty
358 type(bc_list_t), intent(inout) :: blstz
359 type(gs_t), intent(inout) :: gs_h
360 type(ksp_monitor_t), dimension(3) :: ksp_results
361 integer, optional, intent(in) :: niter
362
363 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
364 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
365 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
366
367 end function pipecg_solve_coupled
368
369end module pipecg
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1068
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:250
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:206
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
Definition pipecg.f90:34
subroutine pipecg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
Definition pipecg.f90:79
subroutine pipecg_free(this)
Deallocate a pipelined PCG solver.
Definition pipecg.f90:124
integer, parameter pipecg_p_space
Definition pipecg.f90:51
type(ksp_monitor_t) function pipecg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Definition pipecg.f90:163
type(ksp_monitor_t) function, dimension(3) pipecg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
Definition pipecg.f90:346
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:62
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Pipelined preconditioned conjugate gradient method.
Definition pipecg.f90:54
Defines a canonical Krylov preconditioner.
Definition precon.f90:40