Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gmres.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gmres
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, xp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp
44 use neko_config, only : neko_blk_size
46 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
47 implicit none
48 private
49
51 type, public, extends(ksp_t) :: gmres_t
52 integer :: lgmres = 30
53 real(kind=rp), allocatable :: w(:)
54 real(kind=rp), allocatable :: r(:)
55 real(kind=rp), allocatable :: z(:,:)
56 real(kind=rp), allocatable :: v(:,:)
58 real(kind=xp), allocatable :: h(:,:)
59 real(kind=xp), allocatable :: s(:)
60 real(kind=xp), allocatable :: gam(:)
61 real(kind=xp), allocatable :: c(:)
62 contains
63 procedure, pass(this) :: init => gmres_init
64 procedure, pass(this) :: free => gmres_free
65 procedure, pass(this) :: solve => gmres_solve
66 procedure, pass(this) :: solve_coupled => gmres_solve_coupled
67 end type gmres_t
68
69contains
70
72 subroutine gmres_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
73 class(gmres_t), target, intent(inout) :: this
74 integer, intent(in) :: n
75 integer, intent(in) :: max_iter
76 class(pc_t), optional, intent(in), target :: M
77 real(kind=rp), optional, intent(in) :: rel_tol
78 real(kind=rp), optional, intent(in) :: abs_tol
79 logical, optional, intent(in) :: monitor
80
81 call this%free()
82
83 if (present(m)) then
84 this%M => m
85 end if
86
87 allocate(this%w(n))
88 allocate(this%r(n))
89
90 allocate(this%c(this%lgmres))
91 allocate(this%s(this%lgmres))
92 allocate(this%gam(this%lgmres + 1))
93
94 allocate(this%z(n, this%lgmres))
95 allocate(this%v(n, this%lgmres))
96
97 allocate(this%h(this%lgmres, this%lgmres))
98
99 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
100 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
101 else if (present(rel_tol) .and. present(abs_tol)) then
102 call this%ksp_init(max_iter, rel_tol, abs_tol)
103 else if (present(monitor) .and. present(abs_tol)) then
104 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
105 else if (present(rel_tol) .and. present(monitor)) then
106 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
107 else if (present(rel_tol)) then
108 call this%ksp_init(max_iter, rel_tol = rel_tol)
109 else if (present(abs_tol)) then
110 call this%ksp_init(max_iter, abs_tol = abs_tol)
111 else if (present(monitor)) then
112 call this%ksp_init(max_iter, monitor = monitor)
113 else
114 call this%ksp_init(max_iter)
115 end if
116
117 end subroutine gmres_init
118
120 subroutine gmres_free(this)
121 class(gmres_t), intent(inout) :: this
122
123 call this%ksp_free()
124
125 if (allocated(this%w)) then
126 deallocate(this%w)
127 end if
128
129 if (allocated(this%c)) then
130 deallocate(this%c)
131 end if
132
133 if (allocated(this%r)) then
134 deallocate(this%r)
135 end if
136
137 if (allocated(this%z)) then
138 deallocate(this%z)
139 end if
140
141 if (allocated(this%h)) then
142 deallocate(this%h)
143 end if
144
145 if (allocated(this%v)) then
146 deallocate(this%v)
147 end if
148
149 if (allocated(this%s)) then
150 deallocate(this%s)
151 end if
152
153
154 if (allocated(this%gam)) then
155 deallocate(this%gam)
156 end if
157
158 nullify(this%M)
159
160 end subroutine gmres_free
161
163 function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
164 result(ksp_results)
165 class(gmres_t), intent(inout) :: this
166 class(ax_t), intent(in) :: ax
167 type(field_t), intent(inout) :: x
168 integer, intent(in) :: n
169 real(kind=rp), dimension(n), intent(in) :: f
170 type(coef_t), intent(inout) :: coef
171 type(bc_list_t), intent(inout) :: blst
172 type(gs_t), intent(inout) :: gs_h
173 type(ksp_monitor_t) :: ksp_results
174 integer, optional, intent(in) :: niter
175 integer :: iter, max_iter
176 integer :: i, j, k, l, ierr
177 real(kind=xp) :: w_plus(neko_blk_size), x_plus(neko_blk_size)
178 real(kind=xp) :: alpha, lr, alpha2, norm_fac
179 real(kind=rp) :: temp, rnorm
180 logical :: conv
181
182 conv = .false.
183 iter = 0
184 rnorm = 0.0_rp
185
186 if (present(niter)) then
187 max_iter = niter
188 else
189 max_iter = this%max_iter
190 end if
191
192 associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
193 v => this%v, s => this%s, gam => this%gam)
194
195 norm_fac = 1.0_rp / sqrt(coef%volume)
196 call rzero(x%x, n)
197 gam = 0.0_xp
198 s = 1.0_xp
199 c = 1.0_xp
200 h = 0.0_xp
201 call this%monitor_start('GMRES')
202 do while (.not. conv .and. iter .lt. max_iter)
203
204 if (iter .eq. 0) then
205 call copy(r, f, n)
206 else
207 call copy(r, f, n)
208 call ax%compute(w, x%x, coef, x%msh, x%Xh)
209 call gs_h%op(w, n, gs_op_add)
210 call blst%apply(w, n)
211 call sub2(r, w, n)
212 end if
213
214 gam(1) = sqrt(glsc3(r, r, coef%mult, n))
215 if (iter .eq. 0) then
216 ksp_results%res_start = gam(1) * norm_fac
217 end if
218
219 if (abscmp(gam(1), 0.0_xp)) exit
220
221 rnorm = 0.0_rp
222 temp = 1.0_rp / gam(1)
223 call cmult2(v(1,1), r, temp, n)
224 do j = 1, this%lgmres
225 iter = iter+1
226
227 call this%M%solve(z(1,j), v(1,j), n)
228
229 call ax%compute(w, z(1,j), coef, x%msh, x%Xh)
230 call gs_h%op(w, n, gs_op_add)
231 call blst%apply(w, n)
232
233 do l = 1, j
234 h(l,j) = 0.0_rp
235 end do
236
237 do i = 0, n, neko_blk_size
238 if (i + neko_blk_size .le. n) then
239 do l = 1, j
240 do k = 1, neko_blk_size
241 h(l,j) = h(l,j) + &
242 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
243 end do
244 end do
245 else
246 do k = 1, n-i
247 do l = 1, j
248 h(l,j) = h(l,j) + &
249 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
250 end do
251 end do
252 end if
253 end do
254
255 call mpi_allreduce(mpi_in_place, h(1,j), j, &
256 mpi_extra_precision, mpi_sum, neko_comm, ierr)
257
258 alpha2 = 0.0_rp
259 do i = 0, n, neko_blk_size
260 if (i + neko_blk_size .le. n) then
261 do k = 1, neko_blk_size
262 w_plus(k) = 0.0_rp
263 end do
264 do l = 1,j
265 do k = 1, neko_blk_size
266 w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
267 end do
268 end do
269 do k = 1, neko_blk_size
270 w(i+k) = w(i+k) + w_plus(k)
271 alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
272 end do
273 else
274 do k = 1, n-i
275 w_plus(1) = 0.0_rp
276 do l = 1, j
277 w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l)
278 end do
279 w(i+k) = w(i+k) + w_plus(1)
280 alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1)
281 end do
282 end if
283 end do
284
285 call mpi_allreduce(mpi_in_place,alpha2, 1, &
286 mpi_extra_precision, mpi_sum, neko_comm, ierr)
287 alpha = sqrt(alpha2)
288 do i = 1, j-1
289 temp = h(i,j)
290 h(i,j) = c(i)*temp + s(i) * h(i+1,j)
291 h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
292 end do
293
294 rnorm = 0.0_rp
295 if (abscmp(alpha, 0.0_xp)) then
296 conv = .true.
297 exit
298 end if
299
300 lr = sqrt(h(j,j) * h(j,j) + alpha2)
301 temp = 1.0_rp / lr
302 c(j) = h(j,j) * temp
303 s(j) = alpha * temp
304 h(j,j) = lr
305 gam(j+1) = -s(j) * gam(j)
306 gam(j) = c(j) * gam(j)
307 rnorm = abs(gam(j+1)) * norm_fac
308 call this%monitor_iter(iter, rnorm)
309 if (rnorm .lt. this%abs_tol) then
310 conv = .true.
311 exit
312 end if
313
314 if (iter + 1 .gt. max_iter) exit
315
316 if (j .lt. this%lgmres) then
317 temp = 1.0_rp / alpha
318 call cmult2(v(1,j+1), w, temp, n)
319 end if
320
321 end do
322
323 j = min(j, this%lgmres)
324 do k = j, 1, -1
325 temp = gam(k)
326 do i = j, k+1, -1
327 temp = temp - h(k,i) * c(i)
328 end do
329 c(k) = temp / h(k,k)
330 end do
331
332 do i = 0, n, neko_blk_size
333 if (i + neko_blk_size .le. n) then
334 do k = 1, neko_blk_size
335 x_plus(k) = 0.0_rp
336 end do
337 do l = 1,j
338 do k = 1, neko_blk_size
339 x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
340 end do
341 end do
342 do k = 1, neko_blk_size
343 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
344 end do
345 else
346 do k = 1, n-i
347 x_plus(1) = 0.0_rp
348 do l = 1, j
349 x_plus(1) = x_plus(1) + c(l) * z(i+k,l)
350 end do
351 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
352 end do
353 end if
354 end do
355 end do
356
357 end associate
358 call this%monitor_stop()
359 ksp_results%res_final = rnorm
360 ksp_results%iter = iter
361 ksp_results%converged = this%is_converged(iter, rnorm)
362
363 end function gmres_solve
364
366 function gmres_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
367 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
368 class(gmres_t), intent(inout) :: this
369 class(ax_t), intent(in) :: ax
370 type(field_t), intent(inout) :: x
371 type(field_t), intent(inout) :: y
372 type(field_t), intent(inout) :: z
373 integer, intent(in) :: n
374 real(kind=rp), dimension(n), intent(in) :: fx
375 real(kind=rp), dimension(n), intent(in) :: fy
376 real(kind=rp), dimension(n), intent(in) :: fz
377 type(coef_t), intent(inout) :: coef
378 type(bc_list_t), intent(inout) :: blstx
379 type(bc_list_t), intent(inout) :: blsty
380 type(bc_list_t), intent(inout) :: blstz
381 type(gs_t), intent(inout) :: gs_h
382 type(ksp_monitor_t), dimension(3) :: ksp_results
383 integer, optional, intent(in) :: niter
384
385 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
386 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
387 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
388
389 end function gmres_solve_coupled
390
391end module gmres
392
393
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:51
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition gmres.f90:34
type(ksp_monitor_t) function gmres_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard GMRES solve.
Definition gmres.f90:165
type(ksp_monitor_t) function, dimension(3) gmres_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard GMRES coupled solve.
Definition gmres.f90:368
subroutine gmres_free(this)
Deallocate a standard GMRES solver.
Definition gmres.f90:121
subroutine gmres_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard GMRES solver.
Definition gmres.f90:73
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:429
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:244
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:774
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Standard preconditioned generalized minimal residual method.
Definition gmres.f90:51
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40