Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pipecg.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module pipecg
35 use neko_config, only : neko_blk_size
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use num_types, only: rp
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, abscmp
46 use mpi_f08, only : mpi_iallreduce, mpi_in_place, mpi_sum, mpi_wait, &
47 mpi_request, mpi_status
48 implicit none
49 private
50
51 integer, parameter :: pipecg_p_space = 7
52
54 type, public, extends(ksp_t) :: pipecg_t
55 real(kind=rp), allocatable :: p(:)
56 real(kind=rp), allocatable :: q(:)
57 real(kind=rp), allocatable :: r(:)
58 real(kind=rp), allocatable :: s(:)
59 real(kind=rp), allocatable :: u(:,:)
60 real(kind=rp), allocatable :: w(:)
61 real(kind=rp), allocatable :: z(:)
62 real(kind=rp), allocatable :: mi(:)
63 real(kind=rp), allocatable :: ni(:)
64 contains
66 procedure, pass(this) :: init => pipecg_init
68 procedure, pass(this) :: free => pipecg_free
70 procedure, pass(this) :: solve => pipecg_solve
72 procedure, pass(this) :: solve_coupled => pipecg_solve_coupled
73 end type pipecg_t
74
75contains
76
78 subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
79 class(pipecg_t), target, intent(inout) :: this
80 integer, intent(in) :: max_iter
81 class(pc_t), optional, intent(in), target :: M
82 integer, intent(in) :: n
83 real(kind=rp), optional, intent(in) :: rel_tol
84 real(kind=rp), optional, intent(in) :: abs_tol
85 logical, optional, intent(in) :: monitor
86
87 call this%free()
88
89 allocate(this%p(n))
90 allocate(this%q(n))
91 allocate(this%r(n))
92 allocate(this%s(n))
93 allocate(this%u(n,pipecg_p_space+1))
94 allocate(this%w(n))
95 allocate(this%z(n))
96 allocate(this%mi(n))
97 allocate(this%ni(n))
98 if (present(m)) then
99 this%M => m
100 end if
101
102 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
104 else if (present(rel_tol) .and. present(abs_tol)) then
105 call this%ksp_init(max_iter, rel_tol, abs_tol)
106 else if (present(monitor) .and. present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
108 else if (present(rel_tol) .and. present(monitor)) then
109 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
110 else if (present(rel_tol)) then
111 call this%ksp_init(max_iter, rel_tol = rel_tol)
112 else if (present(abs_tol)) then
113 call this%ksp_init(max_iter, abs_tol = abs_tol)
114 else if (present(monitor)) then
115 call this%ksp_init(max_iter, monitor = monitor)
116 else
117 call this%ksp_init(max_iter)
118 end if
119
120 end subroutine pipecg_init
121
123 subroutine pipecg_free(this)
124 class(pipecg_t), intent(inout) :: this
125
126 call this%ksp_free()
127
128 if (allocated(this%p)) then
129 deallocate(this%p)
130 end if
131 if (allocated(this%q)) then
132 deallocate(this%q)
133 end if
134 if (allocated(this%r)) then
135 deallocate(this%r)
136 end if
137 if (allocated(this%s)) then
138 deallocate(this%s)
139 end if
140 if (allocated(this%u)) then
141 deallocate(this%u)
142 end if
143 if (allocated(this%w)) then
144 deallocate(this%w)
145 end if
146 if (allocated(this%z)) then
147 deallocate(this%z)
148 end if
149 if (allocated(this%mi)) then
150 deallocate(this%mi)
151 end if
152 if (allocated(this%ni)) then
153 deallocate(this%ni)
154 end if
155
156 nullify(this%M)
157
158
159 end subroutine pipecg_free
160
162 function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
163 class(pipecg_t), intent(inout) :: this
164 class(ax_t), intent(in) :: ax
165 type(field_t), intent(inout) :: x
166 integer, intent(in) :: n
167 real(kind=rp), dimension(n), intent(in) :: f
168 type(coef_t), intent(inout) :: coef
169 type(bc_list_t), intent(inout) :: blst
170 type(gs_t), intent(inout) :: gs_h
171 type(ksp_monitor_t) :: ksp_results
172 integer, optional, intent(in) :: niter
173 integer :: iter, max_iter, i, j, k, ierr, p_cur, p_prev, u_prev
174 real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
175 real(kind=rp) :: alpha(pipecg_p_space), beta(pipecg_p_space)
176 real(kind=rp) :: gamma1, gamma2, delta
177 real(kind=rp) :: tmp1, tmp2, tmp3, x_plus(neko_blk_size)
178 type(mpi_request) :: request
179 type(mpi_status) :: status
180
181 if (present(niter)) then
182 max_iter = niter
183 else
184 max_iter = this%max_iter
185 end if
186 norm_fac = 1.0_rp / sqrt(coef%volume)
187
188 associate(p => this%p, q => this%q, r => this%r, s => this%s, &
189 u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni)
190
191 p_prev = pipecg_p_space
192 u_prev = pipecg_p_space+1
193 p_cur = 1
194 !$omp parallel do
195 do i = 1, n
196 x%x(i,1,1,1) = 0.0_rp
197 z(i) = 0.0_rp
198 q(i) = 0.0_rp
199 p(i) = 0.0_rp
200 s(i) = 0.0_rp
201 r(i) = f(i)
202 end do
203 !$omp end parallel do
204 call this%M%solve(u(1,u_prev), r, n)
205 call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
206 call gs_h%op(w, n, gs_op_add)
207 call blst%apply(w, n)
208
209 rtr = glsc3(r, coef%mult, r, n)
210 rnorm = sqrt(rtr)*norm_fac
211 ksp_results%res_start = rnorm
212 ksp_results%res_final = rnorm
213 ksp_results%iter = 0
214
215 if(abscmp(rnorm, 0.0_rp)) then
216 ksp_results%converged = .true.
217 return
218 end if
219
220 gamma1 = 0.0_rp
221 tmp1 = 0.0_rp
222 tmp2 = 0.0_rp
223 tmp3 = 0.0_rp
224 !$omp parallel do reduction(+:tmp1,tmp2,tmp3)
225 do i = 1, n
226 tmp1 = tmp1 + r(i) * coef%mult(i,1,1,1) * u(i,u_prev)
227 tmp2 = tmp2 + w(i) * coef%mult(i,1,1,1) * u(i,u_prev)
228 tmp3 = tmp3 + r(i) * coef%mult(i,1,1,1) * r(i)
229 end do
230 !$omp end parallel do
231 reduction(1) = tmp1
232 reduction(2) = tmp2
233 reduction(3) = tmp3
234
235 call this%monitor_start('PipeCG')
236 do iter = 1, max_iter
237 call mpi_iallreduce(mpi_in_place, reduction, 3, &
238 mpi_real_precision, mpi_sum, neko_comm, request, ierr)
239
240 call this%M%solve(mi, w, n)
241 call ax%compute(ni, mi, coef, x%msh, x%Xh)
242 call gs_h%op(ni, n, gs_op_add)
243 call blst%apply(ni, n)
244
245 call mpi_wait(request, status, ierr)
246 gamma2 = gamma1
247 gamma1 = reduction(1)
248 delta = reduction(2)
249 rtr = reduction(3)
250
251 rnorm = sqrt(rtr)*norm_fac
252 call this%monitor_iter(iter, rnorm)
253 if (rnorm .lt. this%abs_tol) exit
254
255 if (iter .gt. 1) then
256 beta(p_cur) = gamma1 / gamma2
257 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
258 else
259 beta(p_cur) = 0.0_rp
260 alpha(p_cur) = gamma1/delta
261 end if
262
263 tmp1 = 0.0_rp
264 tmp2 = 0.0_rp
265 tmp3 = 0.0_rp
266 !$omp parallel do private(k) reduction(+:tmp1,tmp2,tmp3)
267 do i = 0, n-1, neko_blk_size
268 if (i + neko_blk_size .le. n) then
269 !$omp simd reduction(+:tmp1,tmp2,tmp3)
270 do k = 1, neko_blk_size
271 z(i+k) = beta(p_cur) * z(i+k) + ni(i+k)
272 q(i+k) = beta(p_cur) * q(i+k) + mi(i+k)
273 s(i+k) = beta(p_cur) * s(i+k) + w(i+k)
274 r(i+k) = r(i+k) - alpha(p_cur) * s(i+k)
275 u(i+k,p_cur) = u(i+k,u_prev) - alpha(p_cur) * q(i+k)
276 w(i+k) = w(i+k) - alpha(p_cur) * z(i+k)
277 tmp1 = tmp1 + r(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
278 tmp2 = tmp2 + w(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
279 tmp3 = tmp3 + r(i+k) * coef%mult(i+k,1,1,1) * r(i+k)
280 end do
281 else
282 do k = 1, n - i
283 z(i+k) = beta(p_cur) * z(i+k) + ni(i+k)
284 q(i+k) = beta(p_cur) * q(i+k) + mi(i+k)
285 s(i+k) = beta(p_cur) * s(i+k) + w(i+k)
286 r(i+k) = r(i+k) - alpha(p_cur) * s(i+k)
287 u(i+k,p_cur) = u(i+k,u_prev) - alpha(p_cur) * q(i+k)
288 w(i+k) = w(i+k) - alpha(p_cur) * z(i+k)
289 tmp1 = tmp1 + r(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
290 tmp2 = tmp2 + w(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
291 tmp3 = tmp3 + r(i+k) * coef%mult(i+k,1,1,1) * r(i+k)
292 end do
293 end if
294 end do
295 !$omp end parallel do
296 reduction(1) = tmp1
297 reduction(2) = tmp2
298 reduction(3) = tmp3
299
300 if (p_cur .eq. pipecg_p_space) then
301 !$omp parallel do private(k, j, p_prev, x_plus)
302 do i = 0, n-1, neko_blk_size
303 if (i + neko_blk_size .le. n) then
304 !$omp simd
305 do k = 1, neko_blk_size
306 x_plus(k) = 0.0_rp
307 end do
308 p_prev = pipecg_p_space+1
309 do j = 1, p_cur
310 !$omp simd
311 do k = 1, neko_blk_size
312 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
313 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
314 end do
315 p_prev = j
316 end do
317 !$omp simd
318 do k = 1, neko_blk_size
319 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
320 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
321 end do
322 else
323 do k = 1, n - i
324 x_plus(k) = 0.0_rp
325 end do
326 p_prev = pipecg_p_space+1
327 do j = 1, p_cur
328 do k = 1, n - i
329 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
330 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
331 end do
332 p_prev = j
333 end do
334 do k = 1, n - i
335 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
336 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
337 end do
338 end if
339 end do
340 !$omp end parallel do
341 p_prev = p_cur
342 u_prev = pipecg_p_space+1
343 alpha(1) = alpha(p_cur)
344 beta(1) = beta(p_cur)
345 p_cur = 1
346 else
347 u_prev = p_cur
348 p_prev = p_cur
349 p_cur = p_cur + 1
350 end if
351 end do
352
353 if ( p_cur .ne. 1) then
354 !$omp parallel do private(k, j, p_prev, x_plus)
355 do i = 0, n-1, neko_blk_size
356 if (i + neko_blk_size .le. n) then
357 !$omp simd
358 do k = 1, neko_blk_size
359 x_plus(k) = 0.0_rp
360 end do
361 p_prev = pipecg_p_space+1
362 do j = 1, p_cur
363 !$omp simd
364 do k = 1, neko_blk_size
365 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
366 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
367 end do
368 p_prev = j
369 end do
370 !$omp simd
371 do k = 1, neko_blk_size
372 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
373 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
374 end do
375 else
376 do k = 1, n - i
377 x_plus(k) = 0.0_rp
378 end do
379 p_prev = pipecg_p_space+1
380 do j = 1, p_cur
381 do k = 1, n - i
382 p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
383 x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
384 end do
385 p_prev = j
386 end do
387 do k = 1, n - i
388 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
389 u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
390 end do
391 end if
392 end do
393 !$omp end parallel do
394 end if
395 call this%monitor_stop()
396 ksp_results%res_final = rnorm
397 ksp_results%iter = iter
398 ksp_results%converged = this%is_converged(iter, rnorm)
399
400 end associate
401
402 end function pipecg_solve
403
405 function pipecg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
406 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
407 class(pipecg_t), intent(inout) :: this
408 class(ax_t), intent(in) :: ax
409 type(field_t), intent(inout) :: x
410 type(field_t), intent(inout) :: y
411 type(field_t), intent(inout) :: z
412 integer, intent(in) :: n
413 real(kind=rp), dimension(n), intent(in) :: fx
414 real(kind=rp), dimension(n), intent(in) :: fy
415 real(kind=rp), dimension(n), intent(in) :: fz
416 type(coef_t), intent(inout) :: coef
417 type(bc_list_t), intent(inout) :: blstx
418 type(bc_list_t), intent(inout) :: blsty
419 type(bc_list_t), intent(inout) :: blstz
420 type(gs_t), intent(inout) :: gs_h
421 type(ksp_monitor_t), dimension(3) :: ksp_results
422 integer, optional, intent(in) :: niter
423
424 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
425 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
426 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
427
428 end function pipecg_solve_coupled
429
430end module pipecg
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:52
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
Definition pipecg.f90:34
subroutine pipecg_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a pipelined PCG solver.
Definition pipecg.f90:79
subroutine pipecg_free(this)
Deallocate a pipelined PCG solver.
Definition pipecg.f90:124
integer, parameter pipecg_p_space
Definition pipecg.f90:51
type(ksp_monitor_t) function pipecg_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Definition pipecg.f90:163
type(ksp_monitor_t) function, dimension(3) pipecg_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Pipelined PCG coupled solve.
Definition pipecg.f90:407
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Pipelined preconditioned conjugate gradient method.
Definition pipecg.f90:54
Defines a canonical Krylov preconditioner.
Definition precon.f90:40