Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gmres.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gmres
35 !$ use omp_lib
36 use krylov, only : ksp_t, ksp_monitor_t
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use num_types, only: rp, xp
40 use field, only : field_t
41 use coefs, only : coef_t
42 use gather_scatter, only : gs_t, gs_op_add
43 use bc_list, only : bc_list_t
44 use math, only : glsc3, rzero, copy, sub2, cmult2, abscmp
45 use neko_config, only : neko_blk_size
47 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
48 implicit none
49 private
50
52 type, public, extends(ksp_t) :: gmres_t
53 integer :: lgmres = 30
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp), allocatable :: z(:,:)
57 real(kind=rp), allocatable :: v(:,:)
59 real(kind=xp), allocatable :: h(:,:)
60 real(kind=xp), allocatable :: s(:)
61 real(kind=xp), allocatable :: gam(:)
62 real(kind=xp), allocatable :: c(:)
64 real(kind=xp), allocatable :: hp(:,:)
65 contains
66 procedure, pass(this) :: init => gmres_init
67 procedure, pass(this) :: free => gmres_free
68 procedure, pass(this) :: solve => gmres_solve
69 procedure, pass(this) :: solve_coupled => gmres_solve_coupled
70 end type gmres_t
71
72contains
73
75 subroutine gmres_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
76 class(gmres_t), target, intent(inout) :: this
77 integer, intent(in) :: n
78 integer, intent(in) :: max_iter
79 class(pc_t), optional, intent(in), target :: M
80 real(kind=rp), optional, intent(in) :: rel_tol
81 real(kind=rp), optional, intent(in) :: abs_tol
82 logical, optional, intent(in) :: monitor
83 integer :: nthrds
84
85 call this%free()
86
87 if (present(m)) then
88 this%M => m
89 end if
90
91 allocate(this%w(n))
92 allocate(this%r(n))
93
94 allocate(this%c(this%lgmres))
95 allocate(this%s(this%lgmres))
96 allocate(this%gam(this%lgmres + 1))
97
98 allocate(this%z(n, this%lgmres))
99 allocate(this%v(n, this%lgmres))
100
101 allocate(this%h(this%lgmres, this%lgmres))
102
103 nthrds = 1
104 !$ nthrds = omp_get_max_threads()
105 allocate(this%hp(this%lgmres, nthrds))
106
107 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
108 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
109 else if (present(rel_tol) .and. present(abs_tol)) then
110 call this%ksp_init(max_iter, rel_tol, abs_tol)
111 else if (present(monitor) .and. present(abs_tol)) then
112 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
113 else if (present(rel_tol) .and. present(monitor)) then
114 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
115 else if (present(rel_tol)) then
116 call this%ksp_init(max_iter, rel_tol = rel_tol)
117 else if (present(abs_tol)) then
118 call this%ksp_init(max_iter, abs_tol = abs_tol)
119 else if (present(monitor)) then
120 call this%ksp_init(max_iter, monitor = monitor)
121 else
122 call this%ksp_init(max_iter)
123 end if
124
125 end subroutine gmres_init
126
128 subroutine gmres_free(this)
129 class(gmres_t), intent(inout) :: this
130
131 call this%ksp_free()
132
133 if (allocated(this%w)) then
134 deallocate(this%w)
135 end if
136
137 if (allocated(this%c)) then
138 deallocate(this%c)
139 end if
140
141 if (allocated(this%r)) then
142 deallocate(this%r)
143 end if
144
145 if (allocated(this%z)) then
146 deallocate(this%z)
147 end if
148
149 if (allocated(this%h)) then
150 deallocate(this%h)
151 end if
152
153 if (allocated(this%v)) then
154 deallocate(this%v)
155 end if
156
157 if (allocated(this%s)) then
158 deallocate(this%s)
159 end if
160
161
162 if (allocated(this%gam)) then
163 deallocate(this%gam)
164 end if
165
166 if (allocated(this%hp)) then
167 deallocate(this%hp)
168 end if
169
170 nullify(this%M)
171
172 end subroutine gmres_free
173
175 function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
176 result(ksp_results)
177 class(gmres_t), intent(inout) :: this
178 class(ax_t), intent(in) :: ax
179 type(field_t), intent(inout) :: x
180 integer, intent(in) :: n
181 real(kind=rp), dimension(n), intent(in) :: f
182 type(coef_t), intent(inout) :: coef
183 type(bc_list_t), intent(inout) :: blst
184 type(gs_t), intent(inout) :: gs_h
185 type(ksp_monitor_t) :: ksp_results
186 integer, optional, intent(in) :: niter
187 integer :: iter, max_iter
188 integer :: i, j, k, l, ierr, tid, nthrds
189 real(kind=xp) :: w_plus(neko_blk_size), x_plus(neko_blk_size)
190 real(kind=xp) :: alpha, lr, alpha2, norm_fac, tmp, acc
191 real(kind=rp) :: temp, rnorm
192 logical :: conv
193
194 conv = .false.
195 iter = 0
196 rnorm = 0.0_rp
197
198 if (present(niter)) then
199 max_iter = niter
200 else
201 max_iter = this%max_iter
202 end if
203
204 nthrds = 1
205 !$ nthrds = omp_get_max_threads()
206
207 associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
208 v => this%v, s => this%s, gam => this%gam, hp => this%hp)
209
210 norm_fac = 1.0_rp / sqrt(coef%volume)
211 call rzero(x%x, n)
212 gam = 0.0_xp
213 s = 1.0_xp
214 c = 1.0_xp
215 h = 0.0_xp
216 call this%monitor_start('GMRES')
217 do while (.not. conv .and. iter .lt. max_iter)
218
219 if (iter .eq. 0) then
220 call copy(r, f, n)
221 else
222 call copy(r, f, n)
223 call ax%compute(w, x%x, coef, x%msh, x%Xh)
224 call gs_h%op(w, n, gs_op_add)
225 call blst%apply(w, n)
226 call sub2(r, w, n)
227 end if
228
229 gam(1) = sqrt(glsc3(r, r, coef%mult, n))
230 if (iter .eq. 0) then
231 ksp_results%res_start = gam(1) * norm_fac
232 end if
233
234 if (abscmp(gam(1), 0.0_xp)) exit
235
236 rnorm = 0.0_rp
237 temp = 1.0_rp / gam(1)
238 call cmult2(v(1,1), r, temp, n)
239 do j = 1, this%lgmres
240 iter = iter+1
241
242 call this%M%solve(z(1,j), v(1,j), n)
243
244 call ax%compute(w, z(1,j), coef, x%msh, x%Xh)
245 call gs_h%op(w, n, gs_op_add)
246 call blst%apply(w, n)
247
248 ! Classical Gram-Schmidt orthogonalization: accumulate
249 ! <w, v_l>_mult for l=1..j into per-thread columns of hp,
250 ! merge across threads, then one MPI_Allreduce of length j.
251 !$omp parallel private(i, k, l, tid, acc)
252 tid = 1
253 !$ tid = omp_get_thread_num() + 1
254 do l = 1, j
255 hp(l,tid) = 0.0_xp
256 end do
257 !$omp do
258 do i = 0, n-1, neko_blk_size
259 if (i + neko_blk_size .le. n) then
260 do l = 1, j
261 acc = hp(l,tid)
262 !$omp simd reduction(+:acc)
263 do k = 1, neko_blk_size
264 acc = acc + &
265 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
266 end do
267 hp(l,tid) = acc
268 end do
269 else
270 do l = 1, j
271 do k = 1, n - i
272 hp(l,tid) = hp(l,tid) + &
273 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
274 end do
275 end do
276 end if
277 end do
278 !$omp end do
279 !$omp end parallel
280
281 ! Cross-thread merge into hp(:, 1), then one Allreduce of j.
282 do k = 2, nthrds
283 do l = 1, j
284 hp(l,1) = hp(l,1) + hp(l,k)
285 end do
286 end do
287 call mpi_allreduce(mpi_in_place, hp(1,1), j, &
288 mpi_extra_precision, mpi_sum, neko_comm, ierr)
289
290 do l = 1, j
291 h(l,j) = hp(l,1)
292 end do
293
294 ! Projection w = w - sum h(l,j) v_l, with fused <w,w>_mult
295 ! reduction for the post-projection norm. alpha2 is computed
296 ! directly (not by Pythagoras) so accuracy is preserved when
297 ! the Krylov subspace is becoming invariant.
298 alpha2 = 0.0_xp
299 !$omp parallel do private(k, l, w_plus, tmp) reduction(+:alpha2)
300 do i = 0, n-1, neko_blk_size
301 if (i + neko_blk_size .le. n) then
302 !$omp simd
303 do k = 1, neko_blk_size
304 w_plus(k) = 0.0_xp
305 end do
306 do l = 1, j
307 !$omp simd
308 do k = 1, neko_blk_size
309 w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
310 end do
311 end do
312 do k = 1, neko_blk_size
313 w(i+k) = w(i+k) + w_plus(k)
314 alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
315 end do
316 else
317 do k = 1, n - i
318 tmp = 0.0_xp
319 do l = 1, j
320 tmp = tmp - h(l,j) * v(i+k,l)
321 end do
322 w(i+k) = w(i+k) + tmp
323 alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
324 end do
325 end if
326 end do
327 !$omp end parallel do
328 call mpi_allreduce(mpi_in_place, alpha2, 1, &
329 mpi_extra_precision, mpi_sum, neko_comm, ierr)
330 alpha = sqrt(alpha2)
331 do i = 1, j-1
332 temp = h(i,j)
333 h(i,j) = c(i)*temp + s(i) * h(i+1,j)
334 h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
335 end do
336
337 rnorm = 0.0_rp
338 if (abscmp(alpha, 0.0_xp)) then
339 conv = .true.
340 exit
341 end if
342
343 lr = sqrt(h(j,j) * h(j,j) + alpha2)
344 temp = 1.0_rp / lr
345 c(j) = h(j,j) * temp
346 s(j) = alpha * temp
347 h(j,j) = lr
348 gam(j+1) = -s(j) * gam(j)
349 gam(j) = c(j) * gam(j)
350 rnorm = abs(gam(j+1)) * norm_fac
351 call this%monitor_iter(iter, rnorm)
352 if (rnorm .lt. this%abs_tol) then
353 conv = .true.
354 exit
355 end if
356
357 if (iter + 1 .gt. max_iter) exit
358
359 if (j .lt. this%lgmres) then
360 temp = 1.0_rp / alpha
361 call cmult2(v(1,j+1), w, temp, n)
362 end if
363
364 end do
365
366 j = min(j, this%lgmres)
367 do k = j, 1, -1
368 temp = gam(k)
369 do i = j, k+1, -1
370 temp = temp - h(k,i) * c(i)
371 end do
372 c(k) = temp / h(k,k)
373 end do
374
375 !$omp parallel do private(k, l, x_plus, tmp)
376 do i = 0, n-1, neko_blk_size
377 if (i + neko_blk_size .le. n) then
378 !$omp simd
379 do k = 1, neko_blk_size
380 x_plus(k) = 0.0_xp
381 end do
382 do l = 1, j
383 !$omp simd
384 do k = 1, neko_blk_size
385 x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
386 end do
387 end do
388 !$omp simd
389 do k = 1, neko_blk_size
390 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
391 end do
392 else
393 do k = 1, n - i
394 tmp = 0.0_xp
395 do l = 1, j
396 tmp = tmp + c(l) * z(i+k,l)
397 end do
398 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + tmp
399 end do
400 end if
401 end do
402 !$omp end parallel do
403 end do
404
405 end associate
406 call this%monitor_stop()
407 ksp_results%res_final = rnorm
408 ksp_results%iter = iter
409 ksp_results%converged = this%is_converged(iter, rnorm)
410
411 end function gmres_solve
412
414 function gmres_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
415 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
416 class(gmres_t), intent(inout) :: this
417 class(ax_t), intent(in) :: ax
418 type(field_t), intent(inout) :: x
419 type(field_t), intent(inout) :: y
420 type(field_t), intent(inout) :: z
421 integer, intent(in) :: n
422 real(kind=rp), dimension(n), intent(in) :: fx
423 real(kind=rp), dimension(n), intent(in) :: fy
424 real(kind=rp), dimension(n), intent(in) :: fz
425 type(coef_t), intent(inout) :: coef
426 type(bc_list_t), intent(inout) :: blstx
427 type(bc_list_t), intent(inout) :: blsty
428 type(bc_list_t), intent(inout) :: blstz
429 type(gs_t), intent(inout) :: gs_h
430 type(ksp_monitor_t), dimension(3) :: ksp_results
431 integer, optional, intent(in) :: niter
432
433 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
434 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
435 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
436
437 end function gmres_solve_coupled
438
439end module gmres
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:53
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition gmres.f90:34
type(ksp_monitor_t) function gmres_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard GMRES solve.
Definition gmres.f90:177
type(ksp_monitor_t) function, dimension(3) gmres_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard GMRES coupled solve.
Definition gmres.f90:416
subroutine gmres_free(this)
Deallocate a standard GMRES solver.
Definition gmres.f90:129
subroutine gmres_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard GMRES solver.
Definition gmres.f90:76
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:517
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1285
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:946
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Standard preconditioned generalized minimal residual method.
Definition gmres.f90:52
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40