Neko  0.9.0
A portable framework for high-order spectral element flow simulations
cacg.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cacg
35  use num_types, only: rp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
43  use math, only : glsc3, rzero, copy, x_update, abscmp
44  use utils, only : neko_warning
45  use comm
46  use mxm_wrapper
47  implicit none
48  private
49 
51  type, public, extends(ksp_t) :: cacg_t
52  real(kind=rp), allocatable :: r(:)
53  real(kind=rp), allocatable :: p(:)
54  real(kind=rp), allocatable :: pr(:,:)
55  integer :: s
56  contains
57  procedure, pass(this) :: init => cacg_init
58  procedure, pass(this) :: free => cacg_free
59  procedure, pass(this) :: solve => cacg_solve
60  procedure, pass(this) :: solve_coupled => cacg_solve_coupled
61  end type cacg_t
62 
63 contains
64 
66  subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol, monitor)
67  class(cacg_t), intent(inout) :: this
68  class(pc_t), optional, intent(inout), target :: M
69  integer, intent(in) :: n
70  integer, intent(in) :: max_iter
71  real(kind=rp), optional, intent(inout) :: rel_tol
72  real(kind=rp), optional, intent(inout) :: abs_tol
73  logical, optional, intent(in) :: monitor
74  integer, optional, intent(inout) :: s
75  call this%free()
76 
77  if (present(s)) then
78  this%s = s
79  else
80  this%s = 4
81  end if
82  if (pe_rank .eq. 0) then
83  call neko_warning("Communication Avoiding CG chosen,&
84  & be aware of potential instabilities")
85  end if
86 
87  allocate(this%r(n))
88  allocate(this%p(n))
89  allocate(this%PR(n,4*this%s+1))
90  if (present(m)) then
91  this%M => m
92  end if
93 
94  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
95  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
96  else if (present(rel_tol) .and. present(abs_tol)) then
97  call this%ksp_init(max_iter, rel_tol, abs_tol)
98  else if (present(monitor) .and. present(abs_tol)) then
99  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
100  else if (present(rel_tol) .and. present(monitor)) then
101  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
102  else if (present(rel_tol)) then
103  call this%ksp_init(max_iter, rel_tol = rel_tol)
104  else if (present(abs_tol)) then
105  call this%ksp_init(max_iter, abs_tol = abs_tol)
106  else if (present(monitor)) then
107  call this%ksp_init(max_iter, monitor = monitor)
108  else
109  call this%ksp_init(max_iter)
110  end if
111 
112  end subroutine cacg_init
113 
115  subroutine cacg_free(this)
116  class(cacg_t), intent(inout) :: this
117 
118  call this%ksp_free()
119 
120  if (allocated(this%PR)) then
121  deallocate(this%PR)
122  end if
123 
124  if (allocated(this%r)) then
125  deallocate(this%r)
126  end if
127 
128  if (allocated(this%p)) then
129  deallocate(this%p)
130  end if
131 
132  nullify(this%M)
133 
134 
135  end subroutine cacg_free
136 
138  function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
139  class(cacg_t), intent(inout) :: this
140  class(ax_t), intent(inout) :: ax
141  type(field_t), intent(inout) :: x
142  integer, intent(in) :: n
143  real(kind=rp), dimension(n), intent(inout) :: f
144  type(coef_t), intent(inout) :: coef
145  type(bc_list_t), intent(inout) :: blst
146  type(gs_t), intent(inout) :: gs_h
147  type(ksp_monitor_t) :: ksp_results
148  integer, optional, intent(in) :: niter
149  integer :: i, j, k, l, iter, max_iter, s, ierr, it
150  real(kind=rp) :: rnorm, rtr, rtz1, tmp
151  real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
152  real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
153  real(kind=rp) :: p_c(4*this%s+1,this%s+1)
154  real(kind=rp) :: r_c(4*this%s+1,this%s+1)
155  real(kind=rp) :: z_c(4*this%s+1,this%s+1)
156  real(kind=rp) :: x_c(4*this%s+1,this%s+1)
157 
158  associate(pr => this%PR, r => this%r, p => this%p)
159  s = this%s
160  if (present(niter)) then
161  max_iter = niter
162  else
163  max_iter = this%max_iter
164  end if
165  norm_fac = 1.0_rp / sqrt(coef%volume)
166 
167  rtz1 = 1.0_rp
168  call rzero(x%x, n)
169  call copy(r, f, n)
170  call this%M%solve(p, r, n)
171 
172  rtr = glsc3(r, coef%mult, r, n)
173  rnorm = sqrt(rtr)*norm_fac
174  ksp_results%res_start = rnorm
175  ksp_results%res_final = rnorm
176  ksp_results%iter = 0
177  iter = 0
178  if(abscmp(rnorm, 0.0_rp)) return
179  call this%monitor_start('CACG')
180  do while (iter < max_iter)
181 
182  call copy(pr,p, n)
183  call copy(pr(1,2*s+2), r, n)
184 
185  !Here we have hardcoded a monomial basis atm.
186  do i = 2, 2*s + 1
187  if (mod(i,2) .eq. 0) then
188  call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
189  call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
190  call bc_list_apply_scalar(blst, pr(1,i), n)
191  else
192  call this%M%solve(pr(1,i), pr(1,i-1), n)
193  end if
194  end do
195 
196  do i = 2*s+2, 4*s
197  if (mod(i,2) == 0) then
198  call this%M%solve(pr(1,i+1), pr(1,i), n)
199  else
200  call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
201  call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
202  call bc_list_apply_scalar(blst, pr(1,1+i), n)
203  end if
204  end do
205 
206  call construct_basis_matrix(tt, s)
207  call rzero(p_c, (4*s+1) * (s+1))
208  p_c(1,1) = 1.0_rp
209  call rzero(r_c, (4*s+1) * (s+1))
210  r_c(2*s+2,1) = 1.0_rp
211  call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
212  call rzero(x_c, (4*s+1) * (s+1))
213  call rzero(temp, (4*s+1)**2)
214 
215  do i = 0, n, neko_blk_size
216  it = 0
217  if (i + neko_blk_size .le. n) then
218  do j = 1, 4*s+1
219  do l = 1, j
220  it = it + 1
221  do k = 1, neko_blk_size
222  temp(it,1) = temp(it,1) &
223  + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
224  end do
225  end do
226  end do
227  else
228  do j = 1, 4*s+1
229  do l = 1, j
230  it = it + 1
231  do k = 1, n-i
232  temp(it,1) = temp(it,1) &
233  + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
234  end do
235  end do
236  end do
237  end if
238  end do
239 
240  call mpi_allreduce(temp, temp2, it, &
241  mpi_real_precision, mpi_sum, neko_comm, ierr)
242  it = 0
243  do j = 1, 4*s+1
244  do k = 1, j
245  it = it + 1
246  g(j,k) = temp2(it,1)
247  g(k,j) = temp2(it,1)
248  end do
249  end do
250 
251  call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
252 
253  do j = 1, s
254  iter = iter + 1
255 
256  call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
257  call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
258  alpha1 = 0.0_rp
259  alpha2 = 0.0_rp
260  do i = 1,4*s+1
261  alpha1 = alpha1 + temp(i,1) * z_c(i,j)
262  alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
263  end do
264  alpha(j) = alpha1/alpha2
265 
266  do i = 1, 4*s+1
267  x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
268  tmp = 0.0_rp
269  do k = 1, 4*s+1
270  tmp = tmp + tt(i,k) * p_c(k,j)
271  end do
272  r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
273  tmp = 0.0_rp
274  do k = 1, 4*s+1
275  tmp = tmp + tt(i,k)*r_c(k,j+1)
276  end do
277  z_c(i,j+1) = tmp
278  end do
279 
280  call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
281  alpha2 = 0.0_rp
282  do i = 1,4*s+1
283  alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
284  end do
285  beta(j) = alpha2 / alpha1
286  do i = 1,4*s+1
287  p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
288  end do
289  end do
290 
291  call rzero(p, n)
292  call rzero(r, n)
293  rtr = 0.0_rp
294  do i = 0, n, neko_blk_size
295  if (i + neko_blk_size .le. n) then
296  do j = 1, 4*s + 1
297  do k = 1, neko_blk_size
298  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
299  p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
300  tmp = pr(i+k,j) * r_c(j,s+1)
301  r(i+k) = r(i+k) + tmp
302  end do
303  end do
304  do k = 1, neko_blk_size
305  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
306  end do
307  else
308  do j = 1,4*s+1
309  do k = 1, n-i
310  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
311  p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
312  tmp = pr(i+k,j) * r_c(j,s+1)
313  r(i+k) = r(i+k) + tmp
314  end do
315  end do
316  do k = 1, n-i
317  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
318  end do
319  end if
320  end do
321 
322  call mpi_allreduce(rtr, tmp, 1, &
323  mpi_real_precision, mpi_sum, neko_comm, ierr)
324  rnorm = norm_fac*sqrt(tmp)
325  call this%monitor_iter(iter, rnorm)
326  if( rnorm <= this%abs_tol) exit
327  end do
328  call this%monitor_stop()
329  ksp_results%res_final = rnorm
330  ksp_results%iter = iter
331 
332  end associate
333 
334  end function cacg_solve
335 
337  subroutine construct_basis_matrix(Tt, s)
338  integer, intent(in) :: s
339  real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)
340  integer :: mlen, i
341  mlen = (4*s+1)*(4*s+1)
342  call rzero(tt,mlen)
343  do i = 1, 2*s
344  tt(i+1,i) = 1.0_rp
345  end do
346  do i = 1, (2*s-1)
347  tt(2*s+2+i,2*s+1+i) = 1.0_rp
348  end do
349  end subroutine construct_basis_matrix
350 
352  function cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
353  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
354  class(cacg_t), intent(inout) :: this
355  class(ax_t), intent(inout) :: ax
356  type(field_t), intent(inout) :: x
357  type(field_t), intent(inout) :: y
358  type(field_t), intent(inout) :: z
359  integer, intent(in) :: n
360  real(kind=rp), dimension(n), intent(inout) :: fx
361  real(kind=rp), dimension(n), intent(inout) :: fy
362  real(kind=rp), dimension(n), intent(inout) :: fz
363  type(coef_t), intent(inout) :: coef
364  type(bc_list_t), intent(inout) :: blstx
365  type(bc_list_t), intent(inout) :: blsty
366  type(bc_list_t), intent(inout) :: blstz
367  type(gs_t), intent(inout) :: gs_h
368  type(ksp_monitor_t), dimension(3) :: ksp_results
369  integer, optional, intent(in) :: niter
370 
371  ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
372  ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
373  ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
374 
375  end function cacg_solve_coupled
376 
377 end module cacg
378 
379 
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
subroutine, public bc_list_apply_scalar(bclst, x, n, t, tstep)
Apply a list of boundary conditions to a scalar field.
Definition: bc.f90:530
Defines a communication avoiding Conjugate Gradient method.
Definition: cacg.f90:34
type(ksp_monitor_t) function, dimension(3) cacg_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
S-step CA PCG coupled solve.
Definition: cacg.f90:354
subroutine construct_basis_matrix(Tt, s)
Monomial matrix constuction, not sparse.
Definition: cacg.f90:338
subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol, monitor)
Initialise a s-step CA PCG solver.
Definition: cacg.f90:67
type(ksp_monitor_t) function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition: cacg.f90:139
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition: cacg.f90:116
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
integer pe_rank
MPI rank.
Definition: comm.F90:28
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:23
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:51
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:895
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition: math.f90:861
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:239
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:195
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Utilities.
Definition: utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition: utils.f90:245
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
S-step communication avoiding preconditioned conjugate gradient method.
Definition: cacg.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40