Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gmres.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module gmres
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp, xp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp
44 use comm
45 implicit none
46 private
47
49 type, public, extends(ksp_t) :: gmres_t
50 integer :: lgmres
51 real(kind=rp), allocatable :: w(:)
52 real(kind=rp), allocatable :: r(:)
53 real(kind=rp), allocatable :: z(:,:)
54 real(kind=rp), allocatable :: v(:,:)
56 real(kind=xp), allocatable :: h(:,:)
57 real(kind=xp), allocatable :: s(:)
58 real(kind=xp), allocatable :: gam(:)
59 real(kind=xp), allocatable :: c(:)
60 contains
61 procedure, pass(this) :: init => gmres_init
62 procedure, pass(this) :: free => gmres_free
63 procedure, pass(this) :: solve => gmres_solve
64 procedure, pass(this) :: solve_coupled => gmres_solve_coupled
65 end type gmres_t
66
67contains
68
70 subroutine gmres_init(this, n, max_iter, M, lgmres, &
71 rel_tol, abs_tol, monitor)
72 class(gmres_t), intent(inout) :: this
73 integer, intent(in) :: n
74 integer, intent(in) :: max_iter
75 class(pc_t), optional, intent(in), target :: M
76 integer, optional, intent(in) :: lgmres
77 real(kind=rp), optional, intent(in) :: rel_tol
78 real(kind=rp), optional, intent(in) :: abs_tol
79 logical, optional, intent(in) :: monitor
80
81 if (present(lgmres)) then
82 this%lgmres = lgmres
83 else
84 this%lgmres = 30
85 end if
86
87
88 call this%free()
89
90 if (present(m)) then
91 this%M => m
92 end if
93
94 allocate(this%w(n))
95 allocate(this%r(n))
96
97 allocate(this%c(this%lgmres))
98 allocate(this%s(this%lgmres))
99 allocate(this%gam(this%lgmres + 1))
100
101 allocate(this%z(n, this%lgmres))
102 allocate(this%v(n, this%lgmres))
103
104 allocate(this%h(this%lgmres, this%lgmres))
105
106 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
107 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
108 else if (present(rel_tol) .and. present(abs_tol)) then
109 call this%ksp_init(max_iter, rel_tol, abs_tol)
110 else if (present(monitor) .and. present(abs_tol)) then
111 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
112 else if (present(rel_tol) .and. present(monitor)) then
113 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
114 else if (present(rel_tol)) then
115 call this%ksp_init(max_iter, rel_tol = rel_tol)
116 else if (present(abs_tol)) then
117 call this%ksp_init(max_iter, abs_tol = abs_tol)
118 else if (present(monitor)) then
119 call this%ksp_init(max_iter, monitor = monitor)
120 else
121 call this%ksp_init(max_iter)
122 end if
123
124 end subroutine gmres_init
125
127 subroutine gmres_free(this)
128 class(gmres_t), intent(inout) :: this
129
130 call this%ksp_free()
131
132 if (allocated(this%w)) then
133 deallocate(this%w)
134 end if
135
136 if (allocated(this%c)) then
137 deallocate(this%c)
138 end if
139
140 if (allocated(this%r)) then
141 deallocate(this%r)
142 end if
143
144 if (allocated(this%z)) then
145 deallocate(this%z)
146 end if
147
148 if (allocated(this%h)) then
149 deallocate(this%h)
150 end if
151
152 if (allocated(this%v)) then
153 deallocate(this%v)
154 end if
155
156 if (allocated(this%s)) then
157 deallocate(this%s)
158 end if
159
160
161 if (allocated(this%gam)) then
162 deallocate(this%gam)
163 end if
164
165 nullify(this%M)
166
167 end subroutine gmres_free
168
170 function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
171 result(ksp_results)
172 class(gmres_t), intent(inout) :: this
173 class(ax_t), intent(in) :: ax
174 type(field_t), intent(inout) :: x
175 integer, intent(in) :: n
176 real(kind=rp), dimension(n), intent(in) :: f
177 type(coef_t), intent(inout) :: coef
178 type(bc_list_t), intent(inout) :: blst
179 type(gs_t), intent(inout) :: gs_h
180 type(ksp_monitor_t) :: ksp_results
181 integer, optional, intent(in) :: niter
182 integer :: iter, max_iter
183 integer :: i, j, k, l, ierr
184 real(kind=xp) :: w_plus(neko_blk_size), x_plus(neko_blk_size)
185 real(kind=xp) :: alpha, lr, alpha2, norm_fac
186 real(kind=rp) :: temp, rnorm
187 logical :: conv
188
189 conv = .false.
190 iter = 0
191
192 if (present(niter)) then
193 max_iter = niter
194 else
195 max_iter = this%max_iter
196 end if
197
198 associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
199 v => this%v, s => this%s, gam => this%gam)
200
201 norm_fac = 1.0_rp / sqrt(coef%volume)
202 call rzero(x%x, n)
203 gam = 0.0_xp
204 s = 1.0_xp
205 c = 1.0_xp
206 h = 0.0_xp
207 call this%monitor_start('GMRES')
208 do while (.not. conv .and. iter .lt. max_iter)
209
210 if (iter .eq. 0) then
211 call copy(r, f, n)
212 else
213 call copy(r, f, n)
214 call ax%compute(w, x%x, coef, x%msh, x%Xh)
215 call gs_h%op(w, n, gs_op_add)
216 call blst%apply(w, n)
217 call sub2(r, w, n)
218 end if
219
220 gam(1) = sqrt(glsc3(r, r, coef%mult, n))
221 if (iter .eq. 0) then
222 ksp_results%res_start = gam(1) * norm_fac
223 end if
224
225 if (abscmp(gam(1), 0.0_xp)) return
226
227 rnorm = 0.0_rp
228 temp = 1.0_rp / gam(1)
229 call cmult2(v(1,1), r, temp, n)
230 do j = 1, this%lgmres
231 iter = iter+1
232
233 call this%M%solve(z(1,j), v(1,j), n)
234
235 call ax%compute(w, z(1,j), coef, x%msh, x%Xh)
236 call gs_h%op(w, n, gs_op_add)
237 call blst%apply(w, n)
238
239 do l = 1, j
240 h(l,j) = 0.0_rp
241 end do
242
243 do i = 0, n, neko_blk_size
244 if (i + neko_blk_size .le. n) then
245 do l = 1, j
246 do k = 1, neko_blk_size
247 h(l,j) = h(l,j) + &
248 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
249 end do
250 end do
251 else
252 do k = 1, n-i
253 do l = 1, j
254 h(l,j) = h(l,j) + &
255 w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
256 end do
257 end do
258 end if
259 end do
260
261 call mpi_allreduce(mpi_in_place, h(1,j), j, &
262 mpi_extra_precision, mpi_sum, neko_comm, ierr)
263
264 alpha2 = 0.0_rp
265 do i = 0, n, neko_blk_size
266 if (i + neko_blk_size .le. n) then
267 do k = 1, neko_blk_size
268 w_plus(k) = 0.0_rp
269 end do
270 do l = 1,j
271 do k = 1, neko_blk_size
272 w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
273 end do
274 end do
275 do k = 1, neko_blk_size
276 w(i+k) = w(i+k) + w_plus(k)
277 alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
278 end do
279 else
280 do k = 1, n-i
281 w_plus(1) = 0.0_rp
282 do l = 1, j
283 w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l)
284 end do
285 w(i+k) = w(i+k) + w_plus(1)
286 alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1)
287 end do
288 end if
289 end do
290
291 call mpi_allreduce(mpi_in_place,alpha2, 1, &
292 mpi_extra_precision, mpi_sum, neko_comm, ierr)
293 alpha = sqrt(alpha2)
294 do i = 1, j-1
295 temp = h(i,j)
296 h(i,j) = c(i)*temp + s(i) * h(i+1,j)
297 h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
298 end do
299
300 rnorm = 0.0_rp
301 if (abscmp(alpha, 0.0_xp)) then
302 conv = .true.
303 exit
304 end if
305
306 lr = sqrt(h(j,j) * h(j,j) + alpha2)
307 temp = 1.0_rp / lr
308 c(j) = h(j,j) * temp
309 s(j) = alpha * temp
310 h(j,j) = lr
311 gam(j+1) = -s(j) * gam(j)
312 gam(j) = c(j) * gam(j)
313 rnorm = abs(gam(j+1)) * norm_fac
314 call this%monitor_iter(iter, rnorm)
315 if (rnorm .lt. this%abs_tol) then
316 conv = .true.
317 exit
318 end if
319
320 if (iter + 1 .gt. max_iter) exit
321
322 if (j .lt. this%lgmres) then
323 temp = 1.0_rp / alpha
324 call cmult2(v(1,j+1), w, temp, n)
325 end if
326
327 end do
328
329 j = min(j, this%lgmres)
330 do k = j, 1, -1
331 temp = gam(k)
332 do i = j, k+1, -1
333 temp = temp - h(k,i) * c(i)
334 end do
335 c(k) = temp / h(k,k)
336 end do
337
338 do i = 0, n, neko_blk_size
339 if (i + neko_blk_size .le. n) then
340 do k = 1, neko_blk_size
341 x_plus(k) = 0.0_rp
342 end do
343 do l = 1,j
344 do k = 1, neko_blk_size
345 x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
346 end do
347 end do
348 do k = 1, neko_blk_size
349 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
350 end do
351 else
352 do k = 1, n-i
353 x_plus(1) = 0.0_rp
354 do l = 1, j
355 x_plus(1) = x_plus(1) + c(l) * z(i+k,l)
356 end do
357 x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
358 end do
359 end if
360 end do
361 end do
362
363 end associate
364 call this%monitor_stop()
365 ksp_results%res_final = rnorm
366 ksp_results%iter = iter
367 ksp_results%converged = this%is_converged(iter, rnorm)
368
369 end function gmres_solve
370
372 function gmres_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
373 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
374 class(gmres_t), intent(inout) :: this
375 class(ax_t), intent(in) :: ax
376 type(field_t), intent(inout) :: x
377 type(field_t), intent(inout) :: y
378 type(field_t), intent(inout) :: z
379 integer, intent(in) :: n
380 real(kind=rp), dimension(n), intent(in) :: fx
381 real(kind=rp), dimension(n), intent(in) :: fy
382 real(kind=rp), dimension(n), intent(in) :: fz
383 type(coef_t), intent(inout) :: coef
384 type(bc_list_t), intent(inout) :: blstx
385 type(bc_list_t), intent(inout) :: blsty
386 type(bc_list_t), intent(inout) :: blstz
387 type(gs_t), intent(inout) :: gs_h
388 type(ksp_monitor_t), dimension(3) :: ksp_results
389 integer, optional, intent(in) :: niter
390
391 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
392 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
393 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
394
395 end function gmres_solve_coupled
396
397end module gmres
398
399
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_comm) neko_comm
MPI communicator.
Definition comm.F90:16
type(mpi_datatype) mpi_extra_precision
Definition comm.F90:24
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition gmres.f90:34
subroutine gmres_init(this, n, max_iter, m, lgmres, rel_tol, abs_tol, monitor)
Initialise a standard GMRES solver.
Definition gmres.f90:72
type(ksp_monitor_t) function gmres_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard GMRES solve.
Definition gmres.f90:172
type(ksp_monitor_t) function, dimension(3) gmres_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard GMRES coupled solve.
Definition gmres.f90:374
subroutine gmres_free(this)
Deallocate a standard GMRES solver.
Definition gmres.f90:128
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:700
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:894
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:227
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:628
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Standard preconditioned generalized minimal residual method.
Definition gmres.f90:49
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40