Neko  0.9.99
A portable framework for high-order spectral element flow simulations
cg.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cg
35  use num_types, only: rp, xp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : glsc3, rzero, copy, abscmp
44  use comm
45  implicit none
46  private
47 
48  integer, parameter :: cg_p_space = 7
49 
51  type, public, extends(ksp_t) :: cg_t
52  real(kind=rp), allocatable :: w(:)
53  real(kind=rp), allocatable :: r(:)
54  real(kind=rp), allocatable :: p(:,:)
55  real(kind=rp), allocatable :: z(:)
56  real(kind=rp), allocatable :: alpha(:)
57  contains
58  procedure, pass(this) :: init => cg_init
59  procedure, pass(this) :: free => cg_free
60  procedure, pass(this) :: solve => cg_solve
61  procedure, pass(this) :: solve_coupled => cg_solve_coupled
62  end type cg_t
63 
64 contains
65 
67  subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
68  class(cg_t), intent(inout), target :: this
69  integer, intent(in) :: max_iter
70  class(pc_t), optional, intent(in), target :: M
71  integer, intent(in) :: n
72  real(kind=rp), optional, intent(in) :: rel_tol
73  real(kind=rp), optional, intent(in) :: abs_tol
74  logical, optional, intent(in) :: monitor
75 
76  call this%free()
77 
78  allocate(this%w(n))
79  allocate(this%r(n))
80  allocate(this%p(n,cg_p_space))
81  allocate(this%z(n))
82  allocate(this%alpha(cg_p_space))
83 
84  if (present(m)) then
85  this%M => m
86  end if
87  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
88  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
89  else if (present(rel_tol) .and. present(abs_tol)) then
90  call this%ksp_init(max_iter, rel_tol, abs_tol)
91  else if (present(monitor) .and. present(abs_tol)) then
92  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
93  else if (present(rel_tol) .and. present(monitor)) then
94  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
95  else if (present(rel_tol)) then
96  call this%ksp_init(max_iter, rel_tol = rel_tol)
97  else if (present(abs_tol)) then
98  call this%ksp_init(max_iter, abs_tol = abs_tol)
99  else if (present(monitor)) then
100  call this%ksp_init(max_iter, monitor = monitor)
101  else
102  call this%ksp_init(max_iter)
103  end if
104 
105  end subroutine cg_init
106 
108  subroutine cg_free(this)
109  class(cg_t), intent(inout) :: this
110 
111  call this%ksp_free()
112 
113  if (allocated(this%w)) then
114  deallocate(this%w)
115  end if
116 
117  if (allocated(this%r)) then
118  deallocate(this%r)
119  end if
120 
121  if (allocated(this%p)) then
122  deallocate(this%p)
123  end if
124 
125  if (allocated(this%z)) then
126  deallocate(this%z)
127  end if
128 
129  if (allocated(this%alpha)) then
130  deallocate(this%alpha)
131  end if
132 
133  nullify(this%M)
134 
135  end subroutine cg_free
136 
138  function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
139  class(cg_t), intent(inout) :: this
140  class(ax_t), intent(in) :: ax
141  type(field_t), intent(inout) :: x
142  integer, intent(in) :: n
143  real(kind=rp), dimension(n), intent(in) :: f
144  type(coef_t), intent(inout) :: coef
145  type(bc_list_t), intent(in) :: blst
146  type(gs_t), intent(inout) :: gs_h
147  type(ksp_monitor_t) :: ksp_results
148  integer, optional, intent(in) :: niter
149  integer :: iter, max_iter, i, j, k, p_cur, p_prev
150  real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(neko_blk_size)
151  real(kind=rp) :: beta, pap, norm_fac
152 
153  if (present(niter)) then
154  max_iter = niter
155  else
156  max_iter = this%max_iter
157  end if
158  norm_fac = 1.0_rp / sqrt(coef%volume)
159 
160  associate(w => this%w, r => this%r, p => this%p, &
161  z => this%z, alpha => this%alpha)
162 
163  rtz1 = 1.0_rp
164  call rzero(x%x, n)
165  call rzero(p(1,cg_p_space), n)
166  call copy(r, f, n)
167 
168  rtr = glsc3(r, coef%mult, r, n)
169  rnorm = sqrt(rtr) * norm_fac
170  ksp_results%res_start = rnorm
171  ksp_results%res_final = rnorm
172  ksp_results%iter = 0
173  p_prev = cg_p_space
174  p_cur = 1
175  if(abscmp(rnorm, 0.0_rp)) return
176  call this%monitor_start('CG')
177  do iter = 1, max_iter
178  call this%M%solve(z, r, n)
179  rtz2 = rtz1
180  rtz1 = glsc3(r, coef%mult, z, n)
181 
182  beta = rtz1 / rtz2
183  if (iter .eq. 1) beta = 0.0_rp
184  do i = 1, n
185  p(i,p_cur) = z(i) + beta * p(i,p_prev)
186  end do
187 
188  call ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh)
189  call gs_h%op(w, n, gs_op_add)
190  call bc_list_apply(blst, w, n)
191 
192  pap = glsc3(w, coef%mult, p(1,p_cur), n)
193 
194  alpha(p_cur) = rtz1 / pap
195  call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n)
196  rnorm = sqrt(rtr) * norm_fac
197  call this%monitor_iter(iter, rnorm)
198 
199  if ((p_cur .eq. cg_p_space) .or. &
200  (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then
201  do i = 0, n, neko_blk_size
202  if (i + neko_blk_size .le. n) then
203  do k = 1, neko_blk_size
204  x_plus(k) = 0.0_rp
205  end do
206  do j = 1, p_cur
207  do k = 1, neko_blk_size
208  x_plus(k) = x_plus(k) + alpha(j) * p(i+k,j)
209  end do
210  end do
211  do k = 1, neko_blk_size
212  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
213  end do
214  else
215  do k = 1, n-i
216  x_plus(1) = 0.0_rp
217  do j = 1, p_cur
218  x_plus(1) = x_plus(1) + alpha(j) * p(i+k,j)
219  end do
220  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
221  end do
222  end if
223  end do
224  p_prev = p_cur
225  p_cur = 1
226  if (rnorm .lt. this%abs_tol) exit
227  else
228  p_prev = p_cur
229  p_cur = p_cur + 1
230  end if
231  end do
232  end associate
233  call this%monitor_stop()
234  ksp_results%res_final = rnorm
235  ksp_results%iter = iter
236 
237  end function cg_solve
238 
239  subroutine second_cg_part(rtr, r, mult, w, alpha, n)
240  integer, intent(in) :: n
241  real(kind=rp), intent(inout) :: r(n), rtr
242  real(kind=xp) :: tmp
243  real(kind=rp), intent(in) ::mult(n), w(n), alpha
244  integer :: i, ierr
245 
246  tmp = 0.0_xp
247  do i = 1, n
248  r(i) = r(i) - alpha*w(i)
249  tmp = tmp + r(i) * r(i) * mult(i)
250  end do
251  call mpi_allreduce(mpi_in_place, tmp, 1, &
252  mpi_extra_precision, mpi_sum, neko_comm, ierr)
253  rtr = tmp
254 
255  end subroutine second_cg_part
256 
258  function cg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
259  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
260  class(cg_t), intent(inout) :: this
261  class(ax_t), intent(in) :: ax
262  type(field_t), intent(inout) :: x
263  type(field_t), intent(inout) :: y
264  type(field_t), intent(inout) :: z
265  integer, intent(in) :: n
266  real(kind=rp), dimension(n), intent(in) :: fx
267  real(kind=rp), dimension(n), intent(in) :: fy
268  real(kind=rp), dimension(n), intent(in) :: fz
269  type(coef_t), intent(inout) :: coef
270  type(bc_list_t), intent(in) :: blstx
271  type(bc_list_t), intent(in) :: blsty
272  type(bc_list_t), intent(in) :: blstz
273  type(gs_t), intent(inout) :: gs_h
274  type(ksp_monitor_t), dimension(3) :: ksp_results
275  integer, optional, intent(in) :: niter
276 
277  ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
278  ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
279  ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
280 
281  end function cg_solve_coupled
282 
283 end module cg
284 
285 
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Defines various Conjugate Gradient methods.
Definition: cg.f90:34
subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a standard PCG solver.
Definition: cg.f90:68
subroutine cg_free(this)
Deallocate a standard PCG solver.
Definition: cg.f90:109
type(ksp_monitor_t) function, dimension(3) cg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Definition: cg.f90:260
integer, parameter cg_p_space
Definition: cg.f90:48
type(ksp_monitor_t) function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition: cg.f90:139
subroutine second_cg_part(rtr, r, mult, w, alpha, n)
Definition: cg.f90:240
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:51
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:895
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
integer, parameter, public xp
Definition: num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Standard preconditioned conjugate gradient method.
Definition: cg.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40