Neko  0.9.99
A portable framework for high-order spectral element flow simulations
cg_coupled.f90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cg_cpld
35  use num_types, only: rp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : glsc3, glsc2
44  use utils, only : neko_error
45  implicit none
46  private
47 
49  type, public, extends(ksp_t) :: cg_cpld_t
50  real(kind=rp), allocatable :: w1(:)
51  real(kind=rp), allocatable :: w2(:)
52  real(kind=rp), allocatable :: w3(:)
53  real(kind=rp), allocatable :: r1(:)
54  real(kind=rp), allocatable :: r2(:)
55  real(kind=rp), allocatable :: r3(:)
56  real(kind=rp), allocatable :: p1(:)
57  real(kind=rp), allocatable :: p2(:)
58  real(kind=rp), allocatable :: p3(:)
59  real(kind=rp), allocatable :: z1(:)
60  real(kind=rp), allocatable :: z2(:)
61  real(kind=rp), allocatable :: z3(:)
62  real(kind=rp), allocatable :: tmp(:)
63  contains
64  procedure, pass(this) :: init => cg_cpld_init
65  procedure, pass(this) :: free => cg_cpld_free
66  procedure, pass(this) :: solve => cg_cpld_nop
67  procedure, pass(this) :: solve_coupled => cg_cpld_solve
68  end type cg_cpld_t
69 
70 contains
71 
73  subroutine cg_cpld_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
74  class(cg_cpld_t), intent(inout) :: this
75  integer, intent(in) :: max_iter
76  class(pc_t), optional, intent(in), target :: M
77  integer, intent(in) :: n
78  real(kind=rp), optional, intent(in) :: rel_tol
79  real(kind=rp), optional, intent(in) :: abs_tol
80  logical, optional, intent(in) :: monitor
81 
82  call this%free()
83 
84  allocate(this%w1(n))
85  allocate(this%w2(n))
86  allocate(this%w3(n))
87  allocate(this%r1(n))
88  allocate(this%r2(n))
89  allocate(this%r3(n))
90  allocate(this%p1(n))
91  allocate(this%p2(n))
92  allocate(this%p3(n))
93  allocate(this%z1(n))
94  allocate(this%z2(n))
95  allocate(this%z3(n))
96  allocate(this%tmp(n))
97 
98  if (present(m)) then
99  this%M => m
100  end if
101 
102  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
103  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
104  else if (present(rel_tol) .and. present(abs_tol)) then
105  call this%ksp_init(max_iter, rel_tol, abs_tol)
106  else if (present(monitor) .and. present(abs_tol)) then
107  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
108  else if (present(rel_tol) .and. present(monitor)) then
109  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
110  else if (present(rel_tol)) then
111  call this%ksp_init(max_iter, rel_tol = rel_tol)
112  else if (present(abs_tol)) then
113  call this%ksp_init(max_iter, abs_tol = abs_tol)
114  else if (present(monitor)) then
115  call this%ksp_init(max_iter, monitor = monitor)
116  else
117  call this%ksp_init(max_iter)
118  end if
119 
120  end subroutine cg_cpld_init
121 
123  subroutine cg_cpld_free(this)
124  class(cg_cpld_t), intent(inout) :: this
125 
126  call this%ksp_free()
127 
128  if (allocated(this%w1)) then
129  deallocate(this%w1)
130  end if
131 
132  if (allocated(this%w2)) then
133  deallocate(this%w2)
134  end if
135 
136  if (allocated(this%w3)) then
137  deallocate(this%w3)
138  end if
139 
140  if (allocated(this%r1)) then
141  deallocate(this%r1)
142  end if
143 
144  if (allocated(this%r2)) then
145  deallocate(this%r2)
146  end if
147 
148  if (allocated(this%r3)) then
149  deallocate(this%r3)
150  end if
151 
152  if (allocated(this%p1)) then
153  deallocate(this%p1)
154  end if
155 
156  if (allocated(this%p2)) then
157  deallocate(this%p2)
158  end if
159 
160  if (allocated(this%p3)) then
161  deallocate(this%p3)
162  end if
163 
164  if (allocated(this%z1)) then
165  deallocate(this%z1)
166  end if
167 
168  if (allocated(this%z2)) then
169  deallocate(this%z2)
170  end if
171 
172  if (allocated(this%z3)) then
173  deallocate(this%z3)
174  end if
175 
176  if (allocated(this%tmp)) then
177  deallocate(this%tmp)
178  end if
179 
180  nullify(this%M)
181 
182  end subroutine cg_cpld_free
183 
184  function cg_cpld_nop(this, Ax, x, f, n, coef, blst, gs_h, niter) &
185  result(ksp_results)
186  class(cg_cpld_t), intent(inout) :: this
187  class(ax_t), intent(in) :: ax
188  type(field_t), intent(inout) :: x
189  integer, intent(in) :: n
190  real(kind=rp), dimension(n), intent(in) :: f
191  type(coef_t), intent(inout) :: coef
192  type(bc_list_t), intent(in) :: blst
193  type(gs_t), intent(inout) :: gs_h
194  type(ksp_monitor_t) :: ksp_results
195  integer, optional, intent(in) :: niter
196 
197  ! Throw and error
198  call neko_error('The cpldcg solver is only defined for coupled solves')
199 
200  ksp_results%res_final = 0.0
201  ksp_results%iter = 0
202  end function cg_cpld_nop
203 
205  function cg_cpld_solve(this, Ax, x, y, z, fx, fy, fz, &
206  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
207  class(cg_cpld_t), intent(inout) :: this
208  class(ax_t), intent(in) :: ax
209  type(field_t), intent(inout) :: x
210  type(field_t), intent(inout) :: y
211  type(field_t), intent(inout) :: z
212  integer, intent(in) :: n
213  real(kind=rp), dimension(n), intent(in) :: fx
214  real(kind=rp), dimension(n), intent(in) :: fy
215  real(kind=rp), dimension(n), intent(in) :: fz
216  type(coef_t), intent(inout) :: coef
217  type(bc_list_t), intent(in) :: blstx
218  type(bc_list_t), intent(in) :: blsty
219  type(bc_list_t), intent(in) :: blstz
220  type(gs_t), intent(inout) :: gs_h
221  type(ksp_monitor_t), dimension(3) :: ksp_results
222  integer, optional, intent(in) :: niter
223  real(kind=rp), parameter :: one = 1.0
224  real(kind=rp), parameter :: zero = 0.0
225  integer :: i, iter, max_iter
226  real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
227  real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
228 
229  if (present(niter)) then
230  max_iter = niter
231  else
232  max_iter = this%max_iter
233  end if
234  norm_fac = one / coef%volume
235 
236  associate(p1 => this%p1, p2 => this%p2, p3 => this%p3, z1 => this%z1, &
237  z2 => this%z2, z3 => this%z3, r1 => this%r1, r2 => this%r2, &
238  r3 => this%r3, tmp => this%tmp, w1 => this%w1, w2 => this%w2, &
239  w3 => this%w3)
240 
241  rtz1 = one
242  do concurrent(i = 1:n)
243  x%x(i,1,1,1) = 0.0_rp
244  y%x(i,1,1,1) = 0.0_rp
245  z%x(i,1,1,1) = 0.0_rp
246  p1(i) = 0.0_rp
247  p2(i) = 0.0_rp
248  p3(i) = 0.0_rp
249  z1(i) = 0.0_rp
250  z2(i) = 0.0_rp
251  z3(i) = 0.0_rp
252  r1(i) = fx(i)
253  r2(i) = fy(i)
254  r3(i) = fz(i)
255  tmp(i) = r1(i)**2 + r2(i)**2 + r3(i)**2
256  end do
257 
258  rtr = glsc3(tmp, coef%mult, coef%binv, n)
259  rnorm = sqrt(rtr*norm_fac)
260  ksp_results%res_start = rnorm
261  ksp_results%res_final = rnorm
262  ksp_results%iter = 0
263  if (rnorm .eq. zero) return
264 
265  call this%monitor_start('cpldCG')
266  do iter = 1, max_iter
267  call this%M%solve(z1, this%r1, n)
268  call this%M%solve(z2, this%r2, n)
269  call this%M%solve(z3, this%r3, n)
270  rtz2 = rtz1
271 
272  do concurrent(i = 1:n)
273  this%tmp(i) = z1(i) * r1(i) &
274  + z2(i) * r2(i) &
275  + z3(i) * r3(i)
276  end do
277 
278  rtz1 = glsc2(tmp, coef%mult, n)
279 
280  beta = rtz1 / rtz2
281  if (iter .eq. 1) beta = zero
282  do concurrent(i = 1:n)
283  p1(i) = p1(i) * beta + z1(i)
284  p2(i) = p2(i) * beta + z2(i)
285  p3(i) = p3(i) * beta + z3(i)
286  end do
287 
288  call ax%compute_vector(w1, w2, w3, p1, p2, p3, coef, x%msh, x%Xh)
289  call gs_h%op(w1, n, gs_op_add)
290  call gs_h%op(w2, n, gs_op_add)
291  call gs_h%op(w3, n, gs_op_add)
292  call bc_list_apply(blstx, w1, n)
293  call bc_list_apply(blsty, w2, n)
294  call bc_list_apply(blstz, w3, n)
295 
296  do concurrent(i = 1:n)
297  tmp(i) = w1(i) * p1(i) &
298  + w2(i) * p2(i) &
299  + w3(i) * p3(i)
300  end do
301 
302  pap = glsc2(tmp, coef%mult, n)
303 
304  alpha = rtz1 / pap
305  alphm = -alpha
306  do concurrent(i = 1:n)
307  x%x(i,1,1,1) = x%x(i,1,1,1) + alpha * p1(i)
308  y%x(i,1,1,1) = y%x(i,1,1,1) + alpha * p2(i)
309  z%x(i,1,1,1) = z%x(i,1,1,1) + alpha * p3(i)
310  r1(i) = r1(i) + alphm * w1(i)
311  r2(i) = r2(i) + alphm * w2(i)
312  r3(i) = r3(i) + alphm * w3(i)
313  tmp(i) = r1(i)**2 + r2(i)**2 + r3(i)**2
314  end do
315 
316  rtr = glsc3(tmp, coef%mult, coef%binv, n)
317  if (iter .eq. 1) rtr0 = rtr
318  rnorm = sqrt(rtr * norm_fac)
319  call this%monitor_iter(iter, rnorm)
320  if (rnorm .lt. this%abs_tol) then
321  exit
322  end if
323  end do
324  end associate
325  call this%monitor_stop()
326  ksp_results%res_final = rnorm
327  ksp_results%iter = iter
328  end function cg_cpld_solve
329 
330 end module cg_cpld
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Defines a coupled Conjugate Gradient methods.
Definition: cg_coupled.f90:34
type(ksp_monitor_t) function, dimension(3) cg_cpld_solve(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Coupled PCG solve.
Definition: cg_coupled.f90:207
subroutine cg_cpld_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a coupled PCG solver.
Definition: cg_coupled.f90:74
type(ksp_monitor_t) function cg_cpld_nop(this, Ax, x, f, n, coef, blst, gs_h, niter)
Definition: cg_coupled.f90:186
subroutine cg_cpld_free(this)
Deallocate a coupled PCG solver.
Definition: cg_coupled.f90:124
Coefficients.
Definition: coef.f90:34
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:51
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:895
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition: math.f90:876
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Utilities.
Definition: utils.f90:35
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Coupled preconditioned conjugate gradient method.
Definition: cg_coupled.f90:49
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40