Neko  0.8.1
A portable framework for high-order spectral element flow simulations
gmres.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module gmres
35  use krylov, only : ksp_t, ksp_monitor_t
36  use precon, only : pc_t
37  use ax_product, only : ax_t
38  use num_types, only: rp
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp
44  use comm
45  implicit none
46  private
47 
49  type, public, extends(ksp_t) :: gmres_t
50  integer :: lgmres
51  real(kind=rp), allocatable :: w(:)
52  real(kind=rp), allocatable :: c(:)
53  real(kind=rp), allocatable :: r(:)
54  real(kind=rp), allocatable :: z(:,:)
55  real(kind=rp), allocatable :: h(:,:)
56  real(kind=rp), allocatable :: v(:,:)
57  real(kind=rp), allocatable :: s(:)
58  real(kind=rp), allocatable :: gam(:)
59  real(kind=rp), allocatable :: wk1(:)
60  contains
61  procedure, pass(this) :: init => gmres_init
62  procedure, pass(this) :: free => gmres_free
63  procedure, pass(this) :: solve => gmres_solve
64  end type gmres_t
65 
66 contains
67 
69  subroutine gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol)
70  class(gmres_t), intent(inout) :: this
71  integer, intent(in) :: n
72  integer, intent(in) :: max_iter
73  class(pc_t), optional, intent(inout), target :: M
74  integer, optional, intent(inout) :: lgmres
75  real(kind=rp), optional, intent(inout) :: rel_tol
76  real(kind=rp), optional, intent(inout) :: abs_tol
77 
78  if (present(lgmres)) then
79  this%lgmres = lgmres
80  else
81  this%lgmres = 30
82  end if
83 
84 
85  call this%free()
86 
87  if (present(m)) then
88  this%M => m
89  end if
90 
91  allocate(this%w(n))
92  allocate(this%r(n))
93  allocate(this%wk1(n))
94 
95  allocate(this%c(this%lgmres))
96  allocate(this%s(this%lgmres))
97  allocate(this%gam(this%lgmres + 1))
98 
99  allocate(this%z(n,this%lgmres))
100  allocate(this%v(n,this%lgmres))
101 
102  allocate(this%h(this%lgmres,this%lgmres))
103 
104 
105  if (present(rel_tol) .and. present(abs_tol)) then
106  call this%ksp_init(max_iter, rel_tol, abs_tol)
107  else if (present(rel_tol)) then
108  call this%ksp_init(max_iter, rel_tol=rel_tol)
109  else if (present(abs_tol)) then
110  call this%ksp_init(max_iter, abs_tol=abs_tol)
111  else
112  call this%ksp_init(max_iter)
113  end if
114 
115  end subroutine gmres_init
116 
118  subroutine gmres_free(this)
119  class(gmres_t), intent(inout) :: this
120 
121  call this%ksp_free()
122 
123  if (allocated(this%w)) then
124  deallocate(this%w)
125  end if
126 
127  if (allocated(this%c)) then
128  deallocate(this%c)
129  end if
130 
131  if (allocated(this%r)) then
132  deallocate(this%r)
133  end if
134 
135  if (allocated(this%z)) then
136  deallocate(this%z)
137  end if
138 
139  if (allocated(this%h)) then
140  deallocate(this%h)
141  end if
142 
143  if (allocated(this%v)) then
144  deallocate(this%v)
145  end if
146 
147  if (allocated(this%s)) then
148  deallocate(this%s)
149  end if
150 
151 
152  if (allocated(this%gam)) then
153  deallocate(this%gam)
154  end if
155 
156  if (allocated(this%wk1)) then
157  deallocate(this%wk1)
158  end if
159 
160  nullify(this%M)
161 
162  end subroutine gmres_free
163 
165  function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
166  class(gmres_t), intent(inout) :: this
167  class(ax_t), intent(inout) :: ax
168  type(field_t), intent(inout) :: x
169  integer, intent(in) :: n
170  real(kind=rp), dimension(n), intent(inout) :: f
171  type(coef_t), intent(inout) :: coef
172  type(bc_list_t), intent(inout) :: blst
173  type(gs_t), intent(inout) :: gs_h
174  type(ksp_monitor_t) :: ksp_results
175  integer, optional, intent(in) :: niter
176  integer :: iter, max_iter
177  integer :: i, j, k, l, ierr
178  real(kind=rp) :: w_plus(neko_blk_size), x_plus(neko_blk_size)
179  real(kind=rp) :: rnorm, alpha, temp, lr, alpha2, norm_fac
180  logical :: conv
181 
182  conv = .false.
183  iter = 0
184 
185  if (present(niter)) then
186  max_iter = niter
187  else
188  max_iter = this%max_iter
189  end if
190 
191  associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
192  v => this%v, s => this%s, gam => this%gam, wk1 =>this%wk1)
193 
194  norm_fac = 1.0_rp / sqrt(coef%volume)
195  call rzero(x%x, n)
196  call rzero(gam, this%lgmres + 1)
197  call rone(s, this%lgmres)
198  call rone(c, this%lgmres)
199  call rzero(h, this%lgmres * this%lgmres)
200  do while (.not. conv .and. iter .lt. max_iter)
201 
202  if(iter.eq.0) then
203  call copy(r, f, n)
204  else
205  call copy(r, f, n)
206  call ax%compute(w, x%x, coef, x%msh, x%Xh)
207  call gs_h%op(w, n, gs_op_add)
208  call bc_list_apply(blst, w, n)
209  call sub2(r, w, n)
210  end if
211 
212  gam(1) = sqrt(glsc3(r, r, coef%mult, n))
213  if(iter.eq.0) then
214  ksp_results%res_start = gam(1) * norm_fac
215  end if
216 
217  if (abscmp(gam(1), 0.0_rp)) return
218 
219  rnorm = 0.0_rp
220  temp = 1.0_rp / gam(1)
221  call cmult2(v(1,1), r, temp, n)
222  do j = 1, this%lgmres
223  iter = iter+1
224 
225  call this%M%solve(z(1,j), v(1,j), n)
226 
227  call ax%compute(w, z(1,j), coef, x%msh, x%Xh)
228  call gs_h%op(w, n, gs_op_add)
229  call bc_list_apply(blst, w, n)
230 
231  do l = 1, j
232  h(l,j) = 0.0_rp
233  enddo
234 
235  do i = 0, n, neko_blk_size
236  if (i + neko_blk_size .le. n) then
237  do l = 1, j
238  do k = 1, neko_blk_size
239  h(l,j) = h(l,j) + &
240  w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
241  end do
242  end do
243  else
244  do k = 1, n-i
245  do l = 1, j
246  h(l,j) = h(l,j) + &
247  w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
248  end do
249  end do
250  end if
251  end do
252 
253  call mpi_allreduce(h(1,j), wk1, j, &
254  mpi_real_precision, mpi_sum, neko_comm, ierr)
255  call copy(h(1,j), wk1, j)
256 
257  alpha2 = 0.0_rp
258  do i = 0,n,neko_blk_size
259  if (i + neko_blk_size .le. n) then
260  do k = 1, neko_blk_size
261  w_plus(k) = 0.0_rp
262  end do
263  do l = 1,j
264  do k = 1, neko_blk_size
265  w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
266  end do
267  end do
268  do k = 1, neko_blk_size
269  w(i+k) = w(i+k) + w_plus(k)
270  alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
271  end do
272  else
273  do k = 1, n-i
274  w_plus(1) = 0.0_rp
275  do l = 1, j
276  w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l)
277  end do
278  w(i+k) = w(i+k) + w_plus(1)
279  alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1)
280  end do
281  end if
282  end do
283 
284  call mpi_allreduce(alpha2, temp, 1, &
285  mpi_real_precision, mpi_sum, neko_comm, ierr)
286  alpha2 = temp
287  alpha = sqrt(alpha2)
288  do i=1,j-1
289  temp = h(i,j)
290  h(i ,j) = c(i)*temp + s(i) * h(i+1,j)
291  h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
292  end do
293 
294  rnorm = 0.0_rp
295  if(abscmp(alpha, 0.0_rp)) then
296  conv = .true.
297  exit
298  end if
299 
300  lr = sqrt(h(j,j) * h(j,j) + alpha**2)
301  temp = 1.0_rp / lr
302  c(j) = h(j,j) * temp
303  s(j) = alpha * temp
304  h(j,j) = lr
305  gam(j+1) = -s(j) * gam(j)
306  gam(j) = c(j) * gam(j)
307 
308  rnorm = abs(gam(j+1)) * norm_fac
309  if (rnorm .lt. this%abs_tol) then
310  conv = .true.
311  exit
312  end if
313 
314  if (iter + 1 .gt. max_iter) exit
315 
316  if( j .lt. this%lgmres) then
317  temp = 1.0_rp / alpha
318  call cmult2(v(1,j+1), w, temp, n)
319  end if
320 
321  end do
322 
323  j = min(j, this%lgmres)
324  do k = j, 1, -1
325  temp = gam(k)
326  do i = j, k+1, -1
327  temp = temp - h(k,i) * c(i)
328  end do
329  c(k) = temp / h(k,k)
330  end do
331 
332  do i = 0, n, neko_blk_size
333  if (i + neko_blk_size .le. n) then
334  do k = 1, neko_blk_size
335  x_plus(k) = 0.0_rp
336  end do
337  do l = 1,j
338  do k = 1, neko_blk_size
339  x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
340  end do
341  end do
342  do k = 1, neko_blk_size
343  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
344  end do
345  else
346  do k = 1, n-i
347  x_plus(1) = 0.0_rp
348  do l = 1, j
349  x_plus(1) = x_plus(1) + c(l) * z(i+k,l)
350  end do
351  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
352  end do
353  end if
354  end do
355  end do
356 
357  end associate
358 
359  ksp_results%res_final = rnorm
360  ksp_results%iter = iter
361 
362  end function gmres_solve
363 
364 end module gmres
365 
366 
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Coefficients.
Definition: coef.f90:34
Definition: comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition: comm.F90:16
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
Definition: comm.F90:22
Defines a field.
Definition: field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition: gmres.f90:34
subroutine gmres_free(this)
Deallocate a standard GMRES solver.
Definition: gmres.f90:119
subroutine gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol)
Initialise a standard GMRES solver.
Definition: gmres.f90:70
type(ksp_monitor_t) function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Standard GMRES solve.
Definition: gmres.f90:166
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
Definition: math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition: math.f90:617
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition: math.f90:810
subroutine, public rone(a, n)
Set all elements to one.
Definition: math.f90:200
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:211
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:167
subroutine, public sub2(a, b, n)
Vector substraction .
Definition: math.f90:545
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:102
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
Standard preconditioned generalized minimal residual method.
Definition: gmres.f90:49
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:55
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:65
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40