Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gmres.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gmres
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, xp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, copy, sub2, cmult2, abscmp
44 use neko_config, only : neko_blk_size
46 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_sum
47 implicit none
48 private
49
51 type, public, extends(ksp_t) :: gmres_t
52 integer :: lgmres = 30
53 real(kind=rp), allocatable :: w(:)
54 real(kind=rp), allocatable :: r(:)
55 real(kind=rp), allocatable :: z(:,:)
56 real(kind=rp), allocatable :: v(:,:)
58 real(kind=xp), allocatable :: h(:,:)
59 real(kind=xp), allocatable :: s(:)
60 real(kind=xp), allocatable :: gam(:)
61 real(kind=xp), allocatable :: c(:)
62 contains
63 procedure, pass(this) :: init => gmres_init
64 procedure, pass(this) :: free => gmres_free
65 procedure, pass(this) :: solve => gmres_solve
66 procedure, pass(this) :: solve_coupled => gmres_solve_coupled
67 end type gmres_t
68
69contains
70
72 subroutine gmres_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
73 class(gmres_t), target, intent(inout) :: this
74 integer, intent(in) :: n
75 integer, intent(in) :: max_iter
76 class(pc_t), optional, intent(in), target :: M
77 real(kind=rp), optional, intent(in) :: rel_tol
78 real(kind=rp), optional, intent(in) :: abs_tol
79 logical, optional, intent(in) :: monitor
80
81 call this%free()
82
83 if (present(m)) then
84 this%M => m
85 end if
86
87 allocate(this%w(n))
88 allocate(this%r(n))
89
90 allocate(this%c(this%lgmres))
91 allocate(this%s(this%lgmres))
92 allocate(this%gam(this%lgmres + 1))
93
94 allocate(this%z(n, this%lgmres))
95 allocate(this%v(n, this%lgmres))
96
97 allocate(this%h(this%lgmres, this%lgmres))
98
99 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
100 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
101 else if (present(rel_tol) .and. present(abs_tol)) then
102 call this%ksp_init(max_iter, rel_tol, abs_tol)
103 else if (present(monitor) .and. present(abs_tol)) then
104 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
105 else if (present(rel_tol) .and. present(monitor)) then
106 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
107 else if (present(rel_tol)) then
108 call this%ksp_init(max_iter, rel_tol = rel_tol)
109 else if (present(abs_tol)) then
110 call this%ksp_init(max_iter, abs_tol = abs_tol)
111 else if (present(monitor)) then
112 call this%ksp_init(max_iter, monitor = monitor)
113 else
114 call this%ksp_init(max_iter)
115 end if
116
117 end subroutine gmres_init
118
120 subroutine gmres_free(this)
121 class(gmres_t), intent(inout) :: this
122
123 call this%ksp_free()
124
125 if (allocated(this%w)) then
126 deallocate(this%w)
127 end if
128
129 if (allocated(this%c)) then
130 deallocate(this%c)
131 end if
132
133 if (allocated(this%r)) then
134 deallocate(this%r)
135 end if
136
137 if (allocated(this%z)) then
138 deallocate(this%z)
139 end if
140
141 if (allocated(this%h)) then
142 deallocate(this%h)
143 end if
144
145 if (allocated(this%v)) then
146 deallocate(this%v)
147 end if
148
149 if (allocated(this%s)) then
150 deallocate(this%s)
151 end if
152
153
154 if (allocated(this%gam)) then
155 deallocate(this%gam)
156 end if
157
158 nullify(this%M)
159
160 end subroutine gmres_free
161
163 function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
164 result(ksp_results)
165 class(gmres_t), intent(inout) :: this
166 class(ax_t), intent(in) :: ax
167 type(field_t), intent(inout) :: x
168 integer, intent(in) :: n
169 real(kind=rp), dimension(n), intent(in) :: f
170 type(coef_t), intent(inout) :: coef
171 type(bc_list_t), intent(inout) :: blst
172 type(gs_t), intent(inout) :: gs_h
173 type(ksp_monitor_t) :: ksp_results
174 integer, optional, intent(in) :: niter
175 integer :: iter, max_iter
176 integer :: i, j, k, l, ierr, blk_size
177 real(kind=xp) :: w_plus(neko_blk_size), x_plus(neko_blk_size)
178 real(kind=xp) :: alpha, lr, alpha2, norm_fac
179 real(kind=xp) :: hl_priv(this%lgmres)
180 real(kind=rp) :: temp, rnorm
181 logical :: conv
182
183 conv = .false.
184 iter = 0
185 rnorm = 0.0_rp
186
187 if (present(niter)) then
188 max_iter = niter
189 else
190 max_iter = this%max_iter
191 end if
192
193 associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
194 v => this%v, s => this%s, gam => this%gam)
195
196 norm_fac = 1.0_rp / sqrt(coef%volume)
197 call rzero(x%x, n)
198 gam = 0.0_xp
199 s = 1.0_xp
200 c = 1.0_xp
201 h = 0.0_xp
202 call this%monitor_start('GMRES')
203 do while (.not. conv .and. iter .lt. max_iter)
204
205 if (iter .eq. 0) then
206 call copy(r, f, n)
207 else
208 call copy(r, f, n)
209 call ax%compute(w, x%x, coef, x%msh, x%Xh)
210 call gs_h%op(w, n, gs_op_add)
211 call blst%apply(w, n)
212 call sub2(r, w, n)
213 end if
214
215 gam(1) = sqrt(glsc3(r, r, coef%mult, n))
216 if (iter .eq. 0) then
217 ksp_results%res_start = gam(1) * norm_fac
218 end if
219
220 if (abscmp(gam(1), 0.0_xp)) exit
221
222 rnorm = 0.0_rp
223 temp = 1.0_rp / gam(1)
224 call cmult2(v(1,1), r, temp, n)
225 do j = 1, this%lgmres
226 iter = iter+1
227
228 call this%M%solve(z(1,j), v(1,j), n)
229
230 call ax%compute(w, z(1,j), coef, x%msh, x%Xh)
231 call gs_h%op(w, n, gs_op_add)
232 call blst%apply(w, n)
233
234 do l = 1, j
235 h(l,j) = 0.0_xp
236 end do
237
238 !$omp parallel private(k, l, hl_priv, blk_size)
239 do l = 1, j
240 hl_priv(l) = 0.0_xp
241 end do
242 !$omp do
243 do i = 0, n, neko_blk_size
244 blk_size = min(neko_blk_size, n - i)
245 do l = 1, j
246 do concurrent(k = 1:blk_size)
247 hl_priv(l) = hl_priv(l) + &
248 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
249 end do
250 end do
251 end do
252 !$omp end do
253
254 do l = 1, j
255 !$omp atomic
256 h(l,j) = h(l,j) + hl_priv(l)
257 end do
258
259 !$omp end parallel
260 call mpi_allreduce(mpi_in_place, h(1,j), j, &
261 mpi_extra_precision, mpi_sum, neko_comm, ierr)
262
263 alpha2 = 0.0_rp
264 !$omp parallel do private(k, l, w_plus, blk_size) reduction(+:alpha2)
265 do i = 0, n, neko_blk_size
266 blk_size = min(neko_blk_size, n - i)
267 do concurrent(k = 1:blk_size)
268 w_plus(k) = 0.0_rp
269 end do
270 do l = 1, j
271 do concurrent(k = 1:blk_size)
272 w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
273 end do
274 end do
275 do k = 1, blk_size
276 w(i+k) = w(i+k) + w_plus(k)
277 alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
278 end do
279 end do
280 !$omp end parallel do
281 call mpi_allreduce(mpi_in_place,alpha2, 1, &
282 mpi_extra_precision, mpi_sum, neko_comm, ierr)
283 alpha = sqrt(alpha2)
284 do i = 1, j-1
285 temp = h(i,j)
286 h(i,j) = c(i)*temp + s(i) * h(i+1,j)
287 h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
288 end do
289
290 rnorm = 0.0_rp
291 if (abscmp(alpha, 0.0_xp)) then
292 conv = .true.
293 exit
294 end if
295
296 lr = sqrt(h(j,j) * h(j,j) + alpha2)
297 temp = 1.0_rp / lr
298 c(j) = h(j,j) * temp
299 s(j) = alpha * temp
300 h(j,j) = lr
301 gam(j+1) = -s(j) * gam(j)
302 gam(j) = c(j) * gam(j)
303 rnorm = abs(gam(j+1)) * norm_fac
304 call this%monitor_iter(iter, rnorm)
305 if (rnorm .lt. this%abs_tol) then
306 conv = .true.
307 exit
308 end if
309
310 if (iter + 1 .gt. max_iter) exit
311
312 if (j .lt. this%lgmres) then
313 temp = 1.0_rp / alpha
314 call cmult2(v(1,j+1), w, temp, n)
315 end if
316
317 end do
318
319 j = min(j, this%lgmres)
320 do k = j, 1, -1
321 temp = gam(k)
322 do i = j, k+1, -1
323 temp = temp - h(k,i) * c(i)
324 end do
325 c(k) = temp / h(k,k)
326 end do
327
328 !$omp parallel do private(k, l, x_plus, blk_size)
329 do i = 0, n, neko_blk_size
330 blk_size = min(neko_blk_size, n - i)
331 do concurrent(k = 1:blk_size)
332 x_plus(k) = 0.0_rp
333 end do
334 do l = 1,j
335 do concurrent(k = 1:blk_size)
336 x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
337 end do
338 end do
339 do concurrent(k = 1:blk_size)
340 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
341 end do
342 end do
343 !$omp end parallel do
344 end do
345
346 end associate
347 call this%monitor_stop()
348 ksp_results%res_final = rnorm
349 ksp_results%iter = iter
350 ksp_results%converged = this%is_converged(iter, rnorm)
351
352 end function gmres_solve
353
355 function gmres_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
356 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
357 class(gmres_t), intent(inout) :: this
358 class(ax_t), intent(in) :: ax
359 type(field_t), intent(inout) :: x
360 type(field_t), intent(inout) :: y
361 type(field_t), intent(inout) :: z
362 integer, intent(in) :: n
363 real(kind=rp), dimension(n), intent(in) :: fx
364 real(kind=rp), dimension(n), intent(in) :: fy
365 real(kind=rp), dimension(n), intent(in) :: fz
366 type(coef_t), intent(inout) :: coef
367 type(bc_list_t), intent(inout) :: blstx
368 type(bc_list_t), intent(inout) :: blsty
369 type(bc_list_t), intent(inout) :: blstz
370 type(gs_t), intent(inout) :: gs_h
371 type(ksp_monitor_t), dimension(3) :: ksp_results
372 integer, optional, intent(in) :: niter
373
374 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
375 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
376 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
377
378 end function gmres_solve_coupled
379
380end module gmres
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
type(mpi_datatype), public mpi_extra_precision
Definition comm.F90:52
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition gmres.f90:34
type(ksp_monitor_t) function gmres_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard GMRES solve.
Definition gmres.f90:165
type(ksp_monitor_t) function, dimension(3) gmres_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard GMRES coupled solve.
Definition gmres.f90:357
subroutine gmres_free(this)
Deallocate a standard GMRES solver.
Definition gmres.f90:121
subroutine gmres_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard GMRES solver.
Definition gmres.f90:73
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:424
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1068
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:250
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:206
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:769
Build configurations.
integer, parameter neko_blk_size
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:62
Standard preconditioned generalized minimal residual method.
Definition gmres.f90:51
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40