Neko  0.8.1
A portable framework for high-order spectral element flow simulations
pipecg.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module pipecg
36  use precon, only : pc_t
37  use ax_product, only : ax_t
38  use num_types, only: rp
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : glsc3, rzero, copy, abscmp
44  use comm
45  implicit none
46  private
47 
48  integer, parameter :: pipecg_p_space = 7
49 
51  type, public, extends(ksp_t) :: pipecg_t
52  real(kind=rp), allocatable :: p(:)
53  real(kind=rp), allocatable :: q(:)
54  real(kind=rp), allocatable :: r(:)
55  real(kind=rp), allocatable :: s(:)
56  real(kind=rp), allocatable :: u(:,:)
57  real(kind=rp), allocatable :: w(:)
58  real(kind=rp), allocatable :: z(:)
59  real(kind=rp), allocatable :: mi(:)
60  real(kind=rp), allocatable :: ni(:)
61  contains
63  procedure, pass(this) :: init => pipecg_init
65  procedure, pass(this) :: free => pipecg_free
67  procedure, pass(this) :: solve => pipecg_solve
68  end type pipecg_t
69 
70 contains
71 
73  subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol)
74  class(pipecg_t), intent(inout) :: this
75  integer, intent(in) :: max_iter
76  class(pc_t), optional, intent(inout), target :: M
77  integer, intent(in) :: n
78  real(kind=rp), optional, intent(inout) :: rel_tol
79  real(kind=rp), optional, intent(inout) :: abs_tol
80 
81  call this%free()
82 
83  allocate(this%p(n))
84  allocate(this%q(n))
85  allocate(this%r(n))
86  allocate(this%s(n))
87  allocate(this%u(n,pipecg_p_space+1))
88  allocate(this%w(n))
89  allocate(this%z(n))
90  allocate(this%mi(n))
91  allocate(this%ni(n))
92  if (present(m)) then
93  this%M => m
94  end if
95 
96  if (present(rel_tol) .and. present(abs_tol)) then
97  call this%ksp_init(max_iter, rel_tol, abs_tol)
98  else if (present(rel_tol)) then
99  call this%ksp_init(max_iter, rel_tol=rel_tol)
100  else if (present(abs_tol)) then
101  call this%ksp_init(max_iter, abs_tol=abs_tol)
102  else
103  call this%ksp_init(max_iter)
104  end if
105 
106  end subroutine pipecg_init
107 
109  subroutine pipecg_free(this)
110  class(pipecg_t), intent(inout) :: this
111 
112  call this%ksp_free()
113 
114  if (allocated(this%p)) then
115  deallocate(this%p)
116  end if
117  if (allocated(this%q)) then
118  deallocate(this%q)
119  end if
120  if (allocated(this%r)) then
121  deallocate(this%r)
122  end if
123  if (allocated(this%s)) then
124  deallocate(this%s)
125  end if
126  if (allocated(this%u)) then
127  deallocate(this%u)
128  end if
129  if (allocated(this%w)) then
130  deallocate(this%w)
131  end if
132  if (allocated(this%z)) then
133  deallocate(this%z)
134  end if
135  if (allocated(this%mi)) then
136  deallocate(this%mi)
137  end if
138  if (allocated(this%ni)) then
139  deallocate(this%ni)
140  end if
141 
142  nullify(this%M)
143 
144 
145  end subroutine pipecg_free
146 
148  function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
149  class(pipecg_t), intent(inout) :: this
150  class(ax_t), intent(inout) :: ax
151  type(field_t), intent(inout) :: x
152  integer, intent(in) :: n
153  real(kind=rp), dimension(n), intent(inout) :: f
154  type(coef_t), intent(inout) :: coef
155  type(bc_list_t), intent(inout) :: blst
156  type(gs_t), intent(inout) :: gs_h
157  type(ksp_monitor_t) :: ksp_results
158  integer, optional, intent(in) :: niter
159  integer :: iter, max_iter, i, j, k, ierr, p_cur, p_prev, u_prev
160  real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
161  real(kind=rp) :: alpha(pipecg_p_space), beta(pipecg_p_space)
162  real(kind=rp) :: gamma1, gamma2, delta
163  real(kind=rp) :: tmp1, tmp2, tmp3, x_plus(neko_blk_size)
164  type(mpi_request) :: request
165  type(mpi_status) :: status
166 
167  if (present(niter)) then
168  max_iter = niter
169  else
170  max_iter = this%max_iter
171  end if
172  norm_fac = 1.0_rp / sqrt(coef%volume)
173 
174  associate(p => this%p, q => this%q, r => this%r, s => this%s, &
175  u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni)
176 
177  p_prev = pipecg_p_space
178  u_prev = pipecg_p_space+1
179  p_cur = 1
180  call rzero(x%x, n)
181  call rzero(z, n)
182  call rzero(q, n)
183  call rzero(p, n)
184  call rzero(s, n)
185  call copy(r, f, n)
186  call this%M%solve(u(1,u_prev), r, n)
187  call ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
188  call gs_h%op(w, n, gs_op_add)
189  call bc_list_apply(blst, w, n)
190 
191  rtr = glsc3(r, coef%mult, r, n)
192  rnorm = sqrt(rtr)*norm_fac
193  ksp_results%res_start = rnorm
194  ksp_results%res_final = rnorm
195  ksp_results%iter = 0
196  if(abscmp(rnorm, 0.0_rp)) return
197 
198  gamma1 = 0.0_rp
199  tmp1 = 0.0_rp
200  tmp2 = 0.0_rp
201  tmp3 = 0.0_rp
202  do i = 1, n
203  tmp1 = tmp1 + r(i) * coef%mult(i,1,1,1) * u(i,u_prev)
204  tmp2 = tmp2 + w(i) * coef%mult(i,1,1,1) * u(i,u_prev)
205  tmp3 = tmp3 + r(i) * coef%mult(i,1,1,1) * r(i)
206  end do
207  reduction(1) = tmp1
208  reduction(2) = tmp2
209  reduction(3) = tmp3
210 
211  do iter = 1, max_iter
212  call mpi_iallreduce(mpi_in_place, reduction, 3, &
213  mpi_real_precision, mpi_sum, neko_comm, request, ierr)
214 
215  call this%M%solve(mi, w, n)
216  call ax%compute(ni, mi, coef, x%msh, x%Xh)
217  call gs_h%op(ni, n, gs_op_add)
218  call bc_list_apply(blst, ni, n)
219 
220  call mpi_wait(request, status, ierr)
221  gamma2 = gamma1
222  gamma1 = reduction(1)
223  delta = reduction(2)
224  rtr = reduction(3)
225 
226  rnorm = sqrt(rtr)*norm_fac
227  if (rnorm .lt. this%abs_tol) exit
228 
229  if (iter .gt. 1) then
230  beta(p_cur) = gamma1 / gamma2
231  alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
232  else
233  beta(p_cur) = 0.0_rp
234  alpha(p_cur) = gamma1/delta
235  end if
236 
237  tmp1 = 0.0_rp
238  tmp2 = 0.0_rp
239  tmp3 = 0.0_rp
240  do i = 0, n, neko_blk_size
241  if (i + neko_blk_size .le. n) then
242  do k = 1, neko_blk_size
243  z(i+k) = beta(p_cur) * z(i+k) + ni(i+k)
244  q(i+k) = beta(p_cur) * q(i+k) + mi(i+k)
245  s(i+k) = beta(p_cur) * s(i+k) + w(i+k)
246  r(i+k) = r(i+k) - alpha(p_cur) * s(i+k)
247  u(i+k,p_cur) = u(i+k,u_prev) - alpha(p_cur) * q(i+k)
248  w(i+k) = w(i+k) - alpha(p_cur) * z(i+k)
249  tmp1 = tmp1 + r(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
250  tmp2 = tmp2 + w(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
251  tmp3 = tmp3 + r(i+k) * coef%mult(i+k,1,1,1) * r(i+k)
252  end do
253  else
254  do k = 1, n-i
255  z(i+k) = beta(p_cur) * z(i+k) + ni(i+k)
256  q(i+k) = beta(p_cur) * q(i+k) + mi(i+k)
257  s(i+k) = beta(p_cur) * s(i+k) + w(i+k)
258  r(i+k) = r(i+k) - alpha(p_cur) * s(i+k)
259  u(i+k,p_cur) = u(i+k,u_prev) - alpha(p_cur) * q(i+k)
260  w(i+k) = w(i+k) - alpha(p_cur) * z(i+k)
261  tmp1 = tmp1 + r(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
262  tmp2 = tmp2 + w(i+k) * coef%mult(i+k,1,1,1) * u(i+k,p_cur)
263  tmp3 = tmp3 + r(i+k) * coef%mult(i+k,1,1,1) * r(i+k)
264  end do
265  end if
266  end do
267 
268  reduction(1) = tmp1
269  reduction(2) = tmp2
270  reduction(3) = tmp3
271 
272  if (p_cur .eq. pipecg_p_space) then
273  do i = 0, n, neko_blk_size
274  if (i + neko_blk_size .le. n) then
275  do k = 1, neko_blk_size
276  x_plus(k) = 0.0_rp
277  end do
278  p_prev = pipecg_p_space+1
279  do j = 1, p_cur
280  do k = 1, neko_blk_size
281  p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
282  x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
283  end do
284  p_prev = j
285  end do
286  do k = 1, neko_blk_size
287  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
288  u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
289  end do
290  else
291  do k = 1, n-i
292  x_plus(1) = 0.0_rp
293  p_prev = pipecg_p_space + 1
294  do j = 1, p_cur
295  p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
296  x_plus(1) = x_plus(1) + alpha(j) * p(i+k)
297  p_prev = j
298  end do
299  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
300  u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
301  end do
302  end if
303  end do
304  p_prev = p_cur
305  u_prev = pipecg_p_space+1
306  alpha(1) = alpha(p_cur)
307  beta(1) = beta(p_cur)
308  p_cur = 1
309  else
310  u_prev = p_cur
311  p_prev = p_cur
312  p_cur = p_cur + 1
313  end if
314  end do
315 
316  if ( p_cur .ne. 1) then
317  do i = 0, n, neko_blk_size
318  if (i + neko_blk_size .le. n) then
319  do k = 1, neko_blk_size
320  x_plus(k) = 0.0_rp
321  end do
322  p_prev = pipecg_p_space+1
323  do j = 1, p_cur
324  do k = 1, neko_blk_size
325  p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
326  x_plus(k) = x_plus(k) + alpha(j) * p(i+k)
327  end do
328  p_prev = j
329  end do
330  do k = 1, neko_blk_size
331  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
332  u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
333  end do
334  else
335  do k = 1, n-i
336  x_plus(1) = 0.0_rp
337  p_prev = pipecg_p_space + 1
338  do j = 1, p_cur
339  p(i+k) = beta(j) * p(i+k) + u(i+k,p_prev)
340  x_plus(1) = x_plus(1) + alpha(j) * p(i+k)
341  p_prev = j
342  end do
343  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
344  u(i+k,pipecg_p_space+1) = u(i+k,pipecg_p_space)
345  end do
346  end if
347  end do
348  end if
349 
350  ksp_results%res_final = rnorm
351  ksp_results%iter = iter
352 
353  end associate
354 
355  end function pipecg_solve
356 
357 end module pipecg
358 
359 
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:22
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:50
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:810
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:211
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:167
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a pipelined Conjugate Gradient methods.
Definition: pipecg.f90:34
subroutine pipecg_free(this)
Deallocate a pipelined PCG solver.
Definition: pipecg.f90:110
integer, parameter pipecg_p_space
Definition: pipecg.f90:48
type(ksp_monitor_t) function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Pipelined PCG solve.
Definition: pipecg.f90:149
subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol)
Initialise a pipelined PCG solver.
Definition: pipecg.f90:74
Krylov preconditioner.
Definition: precon.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:102
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:55
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:65
Pipelined preconditioned conjugate gradient method.
Definition: pipecg.f90:51
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40