Neko  0.8.1
A portable framework for high-order spectral element flow simulations
cacg.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cacg
35  use num_types, only: rp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
43  use math, only : glsc3, rzero, copy, x_update, abscmp
44  use utils, only : neko_warning
45  use comm
46  use mxm_wrapper
47  implicit none
48  private
49 
51  type, public, extends(ksp_t) :: cacg_t
52  real(kind=rp), allocatable :: r(:)
53  real(kind=rp), allocatable :: p(:)
54  real(kind=rp), allocatable :: pr(:,:)
55  integer :: s
56  contains
57  procedure, pass(this) :: init => cacg_init
58  procedure, pass(this) :: free => cacg_free
59  procedure, pass(this) :: solve => cacg_solve
60  end type cacg_t
61 
62 contains
63 
65  subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol)
66  class(cacg_t), intent(inout) :: this
67  class(pc_t), optional, intent(inout), target :: M
68  integer, intent(in) :: n
69  integer, intent(in) :: max_iter
70  real(kind=rp), optional, intent(inout) :: rel_tol
71  real(kind=rp), optional, intent(inout) :: abs_tol
72  integer, optional, intent(inout) :: s
73  call this%free()
74 
75  if (present(s)) then
76  this%s = s
77  else
78  this%s = 4
79  end if
80  if (pe_rank .eq. 0) then
81  call neko_warning("Communication Avoiding CG chosen, be aware of potential instabilities")
82  end if
83 
84  allocate(this%r(n))
85  allocate(this%p(n))
86  allocate(this%PR(n,4*this%s+1))
87  if (present(m)) then
88  this%M => m
89  end if
90 
91  if (present(rel_tol) .and. present(abs_tol)) then
92  call this%ksp_init(max_iter, rel_tol, abs_tol)
93  else if (present(rel_tol)) then
94  call this%ksp_init(max_iter, rel_tol=rel_tol)
95  else if (present(abs_tol)) then
96  call this%ksp_init(max_iter, abs_tol=abs_tol)
97  else
98  call this%ksp_init(max_iter)
99  end if
100 
101  end subroutine cacg_init
102 
104  subroutine cacg_free(this)
105  class(cacg_t), intent(inout) :: this
106 
107  call this%ksp_free()
108 
109  if (allocated(this%PR)) then
110  deallocate(this%PR)
111  end if
112 
113  if (allocated(this%r)) then
114  deallocate(this%r)
115  end if
116 
117  if (allocated(this%p)) then
118  deallocate(this%p)
119  end if
120 
121  nullify(this%M)
122 
123 
124  end subroutine cacg_free
125 
127  function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
128  class(cacg_t), intent(inout) :: this
129  class(ax_t), intent(inout) :: ax
130  type(field_t), intent(inout) :: x
131  integer, intent(in) :: n
132  real(kind=rp), dimension(n), intent(inout) :: f
133  type(coef_t), intent(inout) :: coef
134  type(bc_list_t), intent(inout) :: blst
135  type(gs_t), intent(inout) :: gs_h
136  type(ksp_monitor_t) :: ksp_results
137  integer, optional, intent(in) :: niter
138  integer :: i, j, k, l, iter, max_iter, s, ierr, it
139  real(kind=rp) :: rnorm, rtr, rtz1, tmp
140  real(kind=rp) :: beta(this%s+1), alpha(this%s+1), alpha1, alpha2, norm_fac
141  real(kind=rp), dimension(4*this%s+1,4*this%s+1) :: tt, g, gtt, temp, temp2
142  real(kind=rp) :: p_c(4*this%s+1,this%s+1)
143  real(kind=rp) :: r_c(4*this%s+1,this%s+1)
144  real(kind=rp) :: z_c(4*this%s+1,this%s+1)
145  real(kind=rp) :: x_c(4*this%s+1,this%s+1)
146 
147  associate(pr => this%PR, r => this%r, p => this%p)
148  s = this%s
149  if (present(niter)) then
150  max_iter = niter
151  else
152  max_iter = this%max_iter
153  end if
154  norm_fac = 1.0_rp / sqrt(coef%volume)
155 
156  rtz1 = 1.0_rp
157  call rzero(x%x, n)
158  call copy(r, f, n)
159  call this%M%solve(p, r, n)
160 
161  rtr = glsc3(r, coef%mult, r, n)
162  rnorm = sqrt(rtr)*norm_fac
163  ksp_results%res_start = rnorm
164  ksp_results%res_final = rnorm
165  ksp_results%iter = 0
166  iter = 0
167  if(abscmp(rnorm, 0.0_rp)) return
168  do while (iter < max_iter)
169 
170  call copy(pr,p, n)
171  call copy(pr(1,2*s+2), r, n)
172 
173  !Here we have hardcoded a monomial basis atm.
174  do i = 2, 2*s + 1
175  if (mod(i,2) .eq. 0) then
176  call ax%compute(pr(1,i), pr(1,i-1), coef, x%msh, x%Xh)
177  call gs_h%gs_op_vector(pr(1,i), n, gs_op_add)
178  call bc_list_apply_scalar(blst, pr(1,i), n)
179  else
180  call this%M%solve(pr(1,i), pr(1,i-1), n)
181  end if
182  end do
183 
184  do i = 2*s+2, 4*s
185  if (mod(i,2) == 0) then
186  call this%M%solve(pr(1,i+1), pr(1,i), n)
187  else
188  call ax%compute(pr(1,i+1), pr(1,i), coef, x%msh, x%Xh)
189  call gs_h%gs_op_vector(pr(1,i+1), n, gs_op_add)
190  call bc_list_apply_scalar(blst, pr(1,1+i), n)
191  end if
192  end do
193 
194  call construct_basis_matrix(tt, s)
195  call rzero(p_c, (4*s+1) * (s+1))
196  p_c(1,1) = 1.0_rp
197  call rzero(r_c, (4*s+1) * (s+1))
198  r_c(2*s+2,1) = 1.0_rp
199  call mxm(tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
200  call rzero(x_c, (4*s+1) * (s+1))
201  call rzero(temp, (4*s+1)**2)
202 
203  do i = 0, n, neko_blk_size
204  it = 0
205  if (i + neko_blk_size .le. n) then
206  do j = 1, 4*s+1
207  do l = 1, j
208  it = it + 1
209  do k = 1, neko_blk_size
210  temp(it,1) = temp(it,1) &
211  + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
212  end do
213  end do
214  end do
215  else
216  do j = 1, 4*s+1
217  do l = 1, j
218  it = it + 1
219  do k = 1, n-i
220  temp(it,1) = temp(it,1) &
221  + pr(i+k,j) * pr(i+k,l) * coef%mult(i+k,1,1,1)
222  end do
223  end do
224  end do
225  end if
226  end do
227 
228  call mpi_allreduce(temp, temp2, it, &
229  mpi_real_precision, mpi_sum, neko_comm, ierr)
230  it = 0
231  do j = 1, 4*s+1
232  do k = 1, j
233  it = it + 1
234  g(j,k) = temp2(it,1)
235  g(k,j) = temp2(it,1)
236  end do
237  end do
238 
239  call mxm(g,4*s+1, tt, 4*s+1,gtt,4*s+1)
240 
241  do j = 1, s
242  iter = iter + 1
243 
244  call mxm(g, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
245  call mxm(gtt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
246  alpha1 = 0.0_rp
247  alpha2 = 0.0_rp
248  do i = 1,4*s+1
249  alpha1 = alpha1 + temp(i,1) * z_c(i,j)
250  alpha2 = alpha2 + temp2(i,1) * p_c(i,j)
251  end do
252  alpha(j) = alpha1/alpha2
253 
254  do i = 1, 4*s+1
255  x_c(i,j+1) = x_c(i,j) + alpha(j) * p_c(i,j)
256  tmp = 0.0_rp
257  do k = 1, 4*s+1
258  tmp = tmp + tt(i,k) * p_c(k,j)
259  end do
260  r_c(i,j+1) = r_c(i,j) - alpha(j)*tmp
261  tmp = 0.0_rp
262  do k = 1, 4*s+1
263  tmp = tmp + tt(i,k)*r_c(k,j+1)
264  end do
265  z_c(i,j+1) = tmp
266  end do
267 
268  call mxm(g,4*s+1,r_c(1,j+1),4*s+1,temp2,1)
269  alpha2 = 0.0_rp
270  do i = 1,4*s+1
271  alpha2 = alpha2 + temp2(i,1)*z_c(i,j+1)
272  end do
273  beta(j) = alpha2 / alpha1
274  do i = 1,4*s+1
275  p_c(i,j+1) = z_c(i,j+1) + beta(j)*p_c(i,j)
276  end do
277  end do
278 
279  call rzero(p, n)
280  call rzero(r, n)
281  rtr = 0.0_rp
282  do i = 0, n, neko_blk_size
283  if (i + neko_blk_size .le. n) then
284  do j = 1, 4*s + 1
285  do k = 1, neko_blk_size
286  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
287  p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
288  tmp = pr(i+k,j) * r_c(j,s+1)
289  r(i+k) = r(i+k) + tmp
290  end do
291  end do
292  do k = 1, neko_blk_size
293  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
294  end do
295  else
296  do j = 1,4*s+1
297  do k = 1, n-i
298  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + pr(i+k,j) * x_c(j,s+1)
299  p(i+k) = p(i+k) + pr(i+k,j) * p_c(j,s+1)
300  tmp = pr(i+k,j) * r_c(j,s+1)
301  r(i+k) = r(i+k) + tmp
302  end do
303  end do
304  do k = 1, n-i
305  rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
306  end do
307  end if
308  end do
309 
310  call mpi_allreduce(rtr, tmp, 1, &
311  mpi_real_precision, mpi_sum, neko_comm, ierr)
312  rnorm = norm_fac*sqrt(tmp)
313  if( rnorm <= this%abs_tol) exit
314  end do
315 
316  ksp_results%res_final = rnorm
317  ksp_results%iter = iter
318 
319  end associate
320 
321  end function cacg_solve
322 
324  subroutine construct_basis_matrix(Tt, s)
325  integer, intent(in) :: s
326  real(kind=rp), intent(inout) :: tt(4*s+1,4*s+1)
327  integer :: mlen, i
328  mlen = (4*s+1)*(4*s+1)
329  call rzero(tt,mlen)
330  do i = 1, 2*s
331  tt(i+1,i) = 1.0_rp
332  end do
333  do i = 1, (2*s-1)
334  tt(2*s+2+i,2*s+1+i) = 1.0_rp
335  end do
336  end subroutine construct_basis_matrix
337 
338 end module cacg
339 
340 
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
subroutine, public bc_list_apply_scalar(bclst, x, n, t, tstep)
Apply a list of boundary conditions to a scalar field.
Definition: bc.f90:515
Defines a communication avoiding Conjugate Gradient method.
Definition: cacg.f90:34
subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol)
Initialise a s-step CA PCG solver.
Definition: cacg.f90:66
subroutine construct_basis_matrix(Tt, s)
Monomial matrix constuction, not sparse.
Definition: cacg.f90:325
type(ksp_monitor_t) function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
S-step CA PCG solve.
Definition: cacg.f90:128
subroutine cacg_free(this)
Deallocate a s-step CA PCG solver.
Definition: cacg.f90:105
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
integer pe_rank
MPI rank.
Definition: comm.F90:26
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:22
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:50
Definition: math.f90:60
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:810
subroutine, public x_update(a, b, c, c1, c2, n)
Returns .
Definition: math.f90:777
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:211
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:167
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Utilities.
Definition: utils.f90:35
subroutine neko_warning(warning_msg)
Definition: utils.f90:191
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:102
S-step communication avoiding preconditioned conjugate gradient method.
Definition: cacg.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:55
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:65
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40