Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
gmres_sx.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : glsc3, rzero, rone, copy, cmult2, col2, col3, add2s2, abscmp
45 use mpi_f08
46 implicit none
47 private
48
50 type, public, extends(ksp_t) :: sx_gmres_t
51 integer :: lgmres = 30
52 real(kind=rp), allocatable :: w(:)
53 real(kind=rp), allocatable :: c(:)
54 real(kind=rp), allocatable :: r(:)
55 real(kind=rp), allocatable :: z(:,:)
56 real(kind=rp), allocatable :: h(:,:)
57 real(kind=rp), allocatable :: ml(:)
58 real(kind=rp), allocatable :: v(:,:)
59 real(kind=rp), allocatable :: s(:)
60 real(kind=rp), allocatable :: mu(:)
61 real(kind=rp), allocatable :: gam(:)
62 real(kind=rp), allocatable :: wk1(:)
63 real(kind=rp) :: rnorm
64 contains
65 procedure, pass(this) :: init => sx_gmres_init
66 procedure, pass(this) :: free => sx_gmres_free
67 procedure, pass(this) :: solve => sx_gmres_solve
68 procedure, pass(this) :: solve_coupled => sx_gmres_solve_coupled
69 end type sx_gmres_t
70
71contains
72
74 subroutine sx_gmres_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
75 class(sx_gmres_t), target, intent(inout) :: this
76 integer, intent(in) :: n
77 integer, intent(in) :: max_iter
78 class(pc_t), optional, intent(in), target :: M
79 real(kind=rp), optional, intent(in) :: rel_tol
80 real(kind=rp), optional, intent(in) :: abs_tol
81 logical, optional, intent(in) :: monitor
82
83 call this%free()
84
85 if (present(m)) then
86 this%M => m
87 end if
88
89 allocate(this%w(n))
90 allocate(this%r(n))
91 allocate(this%ml(n))
92 allocate(this%mu(n))
93 allocate(this%wk1(n))
94
95 allocate(this%c(this%lgmres))
96 allocate(this%s(this%lgmres))
97 allocate(this%gam(this%lgmres + 1))
98
99 allocate(this%z(n,this%lgmres))
100 allocate(this%v(n,this%lgmres))
101
102 allocate(this%h(this%lgmres,this%lgmres))
103
104
105 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
106 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
107 else if (present(rel_tol) .and. present(abs_tol)) then
108 call this%ksp_init(max_iter, rel_tol, abs_tol)
109 else if (present(monitor) .and. present(abs_tol)) then
110 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
111 else if (present(rel_tol) .and. present(monitor)) then
112 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
113 else if (present(rel_tol)) then
114 call this%ksp_init(max_iter, rel_tol = rel_tol)
115 else if (present(abs_tol)) then
116 call this%ksp_init(max_iter, abs_tol = abs_tol)
117 else if (present(monitor)) then
118 call this%ksp_init(max_iter, monitor = monitor)
119 else
120 call this%ksp_init(max_iter)
121 end if
122
123 end subroutine sx_gmres_init
124
126 subroutine sx_gmres_free(this)
127 class(sx_gmres_t), intent(inout) :: this
128
129 call this%ksp_free()
130
131 if (allocated(this%w)) then
132 deallocate(this%w)
133 end if
134
135 if (allocated(this%c)) then
136 deallocate(this%c)
137 end if
138
139 if (allocated(this%r)) then
140 deallocate(this%r)
141 end if
142
143 if (allocated(this%z)) then
144 deallocate(this%z)
145 end if
146
147 if (allocated(this%h)) then
148 deallocate(this%h)
149 end if
150
151 if (allocated(this%ml)) then
152 deallocate(this%ml)
153 end if
154
155 if (allocated(this%v)) then
156 deallocate(this%v)
157 end if
158
159 if (allocated(this%s)) then
160 deallocate(this%s)
161 end if
162
163 if (allocated(this%mu)) then
164 deallocate(this%mu)
165 end if
166
167 if (allocated(this%gam)) then
168 deallocate(this%gam)
169 end if
170
171 if (allocated(this%wk1)) then
172 deallocate(this%wk1)
173 end if
174
175 nullify(this%M)
176
177 end subroutine sx_gmres_free
178
180 function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
181 class(sx_gmres_t), intent(inout) :: this
182 class(ax_t), intent(in) :: ax
183 type(field_t), intent(inout) :: x
184 integer, intent(in) :: n
185 real(kind=rp), dimension(n), intent(in) :: f
186 type(coef_t), intent(inout) :: coef
187 type(bc_list_t), intent(inout) :: blst
188 type(gs_t), intent(inout) :: gs_h
189 type(ksp_monitor_t) :: ksp_results
190 integer, optional, intent(in) :: niter
191 integer :: iter, max_iter, glb_n
192 integer :: i, j, k, ierr
193 real(kind=rp), parameter :: one = 1.0
194 real(kind=rp) :: rnorm
195 real(kind=rp) :: alpha, temp, l
196 real(kind=rp) :: ratio, div0, norm_fac
197 logical :: conv
198 integer outer
199
200 conv = .false.
201 iter = 0
202 rnorm = 0.0_rp
203 glb_n = n / x%msh%nelv * x%msh%glb_nelv
204
205 if (present(niter)) then
206 max_iter = niter
207 else
208 max_iter = this%max_iter
209 end if
210
211 call rone(this%ml, n)
212 call rone(this%mu, n)
213 norm_fac = one / sqrt(coef%volume)
214 call rzero(x%x, n)
215 call rzero(this%gam, this%lgmres + 1)
216 call rone(this%s, this%lgmres)
217 call rone(this%c, this%lgmres)
218 call rzero(this%h, this%lgmres * this%lgmres)
219 outer = 0
220 call this%monitor_start('GMRES')
221 do while (.not. conv .and. iter .lt. max_iter)
222 outer = outer + 1
223
224 if(iter.eq.0) then
225 call col3(this%r,this%ml,f,n)
226 else
227 !update residual
228 call copy (this%r,f,n)
229 call ax%compute(this%w, x%x, coef, x%msh, x%Xh)
230 call gs_h%op(this%w, n, gs_op_add)
231 call blst%apply(this%w, n)
232 call add2s2(this%r,this%w,-one,n)
233 call col2(this%r,this%ml,n)
234 endif
235 this%gam(1) = sqrt(glsc3(this%r, this%r, coef%mult, n))
236 if(iter.eq.0) then
237 div0 = this%gam(1) * norm_fac
238 ksp_results%res_start = div0
239 endif
240
241 if (abscmp(this%gam(1), 0.0_rp)) exit
242
243 rnorm = 0.0_rp
244 temp = one / this%gam(1)
245 call cmult2(this%v(1,1), this%r, temp, n)
246 do j = 1, this%lgmres
247 iter = iter+1
248 call col3(this%w, this%mu, this%v(1,j), n)
249
250 !Apply precond
251 call this%M%solve(this%z(1,j), this%w, n)
252
253 call ax%compute(this%w, this%z(1,j), coef, x%msh, x%Xh)
254 call gs_h%op(this%w, n, gs_op_add)
255 call blst%apply(this%w, n)
256 call col2(this%w, this%ml, n)
257
258 do i = 1, j
259 this%h(i,j) = 0.0_rp
260 do k = 1, n
261 this%h(i,j) = this%h(i,j) + &
262 this%w(k) * this%v(k,i) * coef%mult(k,1,1,1)
263 end do
264 end do
265
266 !Could probably be done inplace...
267 call mpi_allreduce(this%h(1,j), this%wk1, j, &
268 mpi_real_precision, mpi_sum, neko_comm, ierr)
269 call copy(this%h(1,j), this%wk1, j)
270
271 do i = 1, j
272 do k = 1, n
273 this%w(k) = this%w(k) - this%h(i,j) * this%v(k,i)
274 end do
275 end do
276
277 !apply Givens rotations to new column
278 do i=1,j-1
279 temp = this%h(i,j)
280 this%h(i ,j) = this%c(i)*temp + this%s(i)*this%h(i+1,j)
281 this%h(i+1,j) = -this%s(i)*temp + this%c(i)*this%h(i+1,j)
282 end do
283
284 alpha = sqrt(glsc3(this%w, this%w, coef%mult, n))
285 rnorm = 0.0_rp
286 if(abscmp(alpha, 0.0_rp)) then
287 conv = .true.
288 exit
289 end if
290 l = sqrt(this%h(j,j) * this%h(j,j) + alpha**2)
291 temp = one / l
292 this%c(j) = this%h(j,j) * temp
293 this%s(j) = alpha * temp
294 this%h(j,j) = l
295 this%gam(j+1) = -this%s(j) * this%gam(j)
296 this%gam(j) = this%c(j) * this%gam(j)
297
298 rnorm = abs(this%gam(j+1)) * norm_fac
299 call this%monitor_iter(iter, rnorm)
300 ratio = rnorm / div0
301 if (rnorm .lt. this%abs_tol) then
302 conv = .true.
303 exit
304 end if
305
306 if (iter + 1 .gt. max_iter) exit
307
308 if( j .lt. this%lgmres) then
309 temp = one / alpha
310 call cmult2(this%v(1,j+1), this%w, temp, n)
311 endif
312 end do
313 j = min(j, this%lgmres)
314 !back substitution
315 do k = j, 1, -1
316 temp = this%gam(k)
317 do i = j, k+1, -1
318 temp = temp - this%h(k,i) * this%c(i)
319 enddo
320 this%c(k) = temp / this%h(k,k)
321 enddo
322 !sum up Arnoldi vectors
323 do i = 1, j
324 do k = 1, n
325 x%x(k,1,1,1) = x%x(k,1,1,1) + this%c(i) * this%z(k,i)
326 end do
327 end do
328 end do
329 call this%monitor_stop()
330 ksp_results%res_final = rnorm
331 ksp_results%iter = iter
332 ksp_results%converged = this%is_converged(iter, rnorm)
333 end function sx_gmres_solve
334
336 function sx_gmres_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
337 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
338 class(sx_gmres_t), intent(inout) :: this
339 class(ax_t), intent(in) :: ax
340 type(field_t), intent(inout) :: x
341 type(field_t), intent(inout) :: y
342 type(field_t), intent(inout) :: z
343 integer, intent(in) :: n
344 real(kind=rp), dimension(n), intent(in) :: fx
345 real(kind=rp), dimension(n), intent(in) :: fy
346 real(kind=rp), dimension(n), intent(in) :: fz
347 type(coef_t), intent(inout) :: coef
348 type(bc_list_t), intent(inout) :: blstx
349 type(bc_list_t), intent(inout) :: blsty
350 type(bc_list_t), intent(inout) :: blstz
351 type(gs_t), intent(inout) :: gs_h
352 type(ksp_monitor_t), dimension(3) :: ksp_results
353 integer, optional, intent(in) :: niter
354
355 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
356 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
357 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
358
359 end function sx_gmres_solve_coupled
360
361end module gmres_sx
362
363
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
Defines a field.
Definition field.f90:34
Gather-scatter.
Defines various GMRES methods.
Definition gmres_sx.f90:34
subroutine sx_gmres_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard GMRES solver.
Definition gmres_sx.f90:75
type(ksp_monitor_t) function, dimension(3) sx_gmres_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard GMRES coupled solve.
Definition gmres_sx.f90:338
subroutine sx_gmres_free(this)
Deallocate a standard GMRES solver.
Definition gmres_sx.f90:127
type(ksp_monitor_t) function sx_gmres_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition gmres_sx.f90:181
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:429
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:244
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:860
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public col3(a, b, c, n)
Vector multiplication with 3 vectors .
Definition math.f90:873
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:818
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Standard preconditioned generalized minimal residual method (SX version)
Definition gmres_sx.f90:50
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40