Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cheby.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cheby
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use gather_scatter, only : gs_t, gs_op_add
44 use bc_list, only : bc_list_t
45 use schwarz, only : schwarz_t
46 use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp, glsc2, &
48 implicit none
49 private
50
52 type, public, extends(ksp_t) :: cheby_t
53 real(kind=rp), allocatable :: d(:)
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp) :: tha, dlt
57 integer :: power_its = 150
58 logical :: recompute_eigs = .true.
59 logical :: zero_initial_guess = .false.
60 type(schwarz_t), pointer :: schwarz => null()
61 contains
62 procedure, pass(this) :: init => cheby_init
63 procedure, pass(this) :: free => cheby_free
64 procedure, pass(this) :: solve => cheby_impl
65 procedure, pass(this) :: solve_coupled => cheby_solve_coupled
66 end type cheby_t
67
68contains
69
71 subroutine cheby_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
72 class(cheby_t), intent(inout), target :: this
73 integer, intent(in) :: max_iter
74 class(pc_t), optional, intent(in), target :: M
75 integer, intent(in) :: n
76 real(kind=rp), optional, intent(in) :: rel_tol
77 real(kind=rp), optional, intent(in) :: abs_tol
78 logical, optional, intent(in) :: monitor
79
80 call this%free()
81 allocate(this%d(n))
82 allocate(this%w(n))
83 allocate(this%r(n))
84
85 if (present(m)) then
86 this%M => m
87 end if
88
89 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
90 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
91 else if (present(rel_tol) .and. present(abs_tol)) then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (present(monitor) .and. present(abs_tol)) then
94 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
95 else if (present(rel_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
97 else if (present(rel_tol)) then
98 call this%ksp_init(max_iter, rel_tol = rel_tol)
99 else if (present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol)
101 else if (present(monitor)) then
102 call this%ksp_init(max_iter, monitor = monitor)
103 else
104 call this%ksp_init(max_iter)
105 end if
106
107 end subroutine cheby_init
108
109 subroutine cheby_free(this)
110 class(cheby_t), intent(inout) :: this
111 if (allocated(this%d)) then
112 deallocate(this%d)
113 end if
114 end subroutine cheby_free
115
116 subroutine cheby_power(this, Ax, x, n, coef, blst, gs_h)
117 class(cheby_t), intent(inout) :: this
118 class(ax_t), intent(in) :: Ax
119 type(field_t), intent(inout) :: x
120 integer, intent(in) :: n
121 type(coef_t), intent(inout) :: coef
122 type(bc_list_t), intent(inout) :: blst
123 type(gs_t), intent(inout) :: gs_h
124 real(kind=rp) :: lam, b, a, rn
125 real(kind=rp) :: boost = 1.1_rp
126 real(kind=rp) :: lam_factor = 30.0_rp
127 real(kind=rp) :: wtw, dtw, dtd
128 integer :: i
129 associate(w => this%w, d => this%d, r => this%r)
130
131 do i = 1, n
132 !TODO: replace with a better way to initialize power method
133 call random_number(rn)
134 d(i) = rn + 10.0_rp
135 end do
136 call gs_h%op(d, n, gs_op_add)
137 call blst%apply(d, n)
138
139 !Power method to get lamba max
140 do i = 1, this%power_its
141 call ax%compute(w, d, coef, x%msh, x%Xh)
142 call gs_h%op(w, n, gs_op_add)
143 call blst%apply(w, n)
144 if (associated(this%schwarz)) then
145 call this%schwarz%compute(r, w)
146 call copy(w, r, n)
147 else
148 call this%M%solve(r, w, n)
149 call copy(w, r, n)
150 end if
151
152 wtw = glsc3(w, coef%mult, w, n)
153 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
154 call blst%apply(d, n)
155 end do
156
157 call ax%compute(w, d, coef, x%msh, x%Xh)
158 call gs_h%op(w, n, gs_op_add)
159 call blst%apply(w, n)
160 if (associated(this%schwarz)) then
161 call this%schwarz%compute(r, w)
162 call copy(w, r, n)
163 else
164 call this%M%solve(r, w, n)
165 call copy(w, r, n)
166 end if
167
168 dtw = glsc3(d, coef%mult, w, n)
169 dtd = glsc3(d, coef%mult, d, n)
170 lam = dtw / dtd
171 b = lam * boost
172 a = lam / lam_factor
173 this%tha = (b+a)/2.0_rp
174 this%dlt = (b-a)/2.0_rp
175
176 this%recompute_eigs = .false.
177 end associate
178 end subroutine cheby_power
179
181 function cheby_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
182 result(ksp_results)
183 class(cheby_t), intent(inout) :: this
184 class(ax_t), intent(in) :: ax
185 type(field_t), intent(inout) :: x
186 integer, intent(in) :: n
187 real(kind=rp), dimension(n), intent(in) :: f
188 type(coef_t), intent(inout) :: coef
189 type(bc_list_t), intent(inout) :: blst
190 type(gs_t), intent(inout) :: gs_h
191 type(ksp_monitor_t) :: ksp_results
192 integer, optional, intent(in) :: niter
193 integer :: iter, max_iter
194 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
195
196 if (this%recompute_eigs) then
197 call cheby_power(this, ax, x, n, coef, blst, gs_h)
198 end if
199
200 if (present(niter)) then
201 max_iter = niter
202 else
203 max_iter = this%max_iter
204 end if
205 norm_fac = 1.0_rp / sqrt(coef%volume)
206
207 associate( w => this%w, r => this%r, d => this%d)
208 ! calculate residual
209 call copy(r, f, n)
210 call ax%compute(w, x%x, coef, x%msh, x%Xh)
211 call gs_h%op(w, n, gs_op_add)
212 call blst%apply(w, n)
213 call sub2(r, w, n)
214
215 rtr = glsc3(r, coef%mult, r, n)
216 rnorm = sqrt(rtr) * norm_fac
217 ksp_results%res_start = rnorm
218 ksp_results%res_final = rnorm
219 ksp_results%iter = 0
220
221 ! First iteration
222 call this%M%solve(w, r, n)
223 call copy(d, w, n)
224 a = 2.0_rp / this%tha
225 call add2s2(x%x, d, a, n)! x = x + a*d
226
227 ! Rest of the iterations
228 do iter = 2, max_iter
229 ! calculate residual
230 call copy(r, f, n)
231 call ax%compute(w, x%x, coef, x%msh, x%Xh)
232 call gs_h%op(w, n, gs_op_add)
233 call blst%apply(w, n)
234 call sub2(r, w, n)
235
236 call this%M%solve(w, r, n)
237
238 if (iter .eq. 2) then
239 b = 0.5_rp * (this%dlt * a)**2
240 else
241 b = (this%dlt * a / 2.0_rp)**2
242 end if
243 a = 1.0_rp/(this%tha - b/a)
244 call add2s1(d, w, b, n)! d = w + b*d
245
246 call add2s2(x%x, d, a, n)! x = x + a*d
247 end do
248
249 ! calculate residual
250 call copy(r, f, n)
251 call ax%compute(w, x%x, coef, x%msh, x%Xh)
252 call gs_h%op(w, n, gs_op_add)
253 call blst%apply(w, n)
254 call sub2(r, w, n)
255 rtr = glsc3(r, coef%mult, r, n)
256 rnorm = sqrt(rtr) * norm_fac
257 ksp_results%res_final = rnorm
258 ksp_results%iter = iter
259 ksp_results%converged = this%is_converged(iter, rnorm)
260 end associate
261 end function cheby_solve
262
264 function cheby_impl(this, Ax, x, f, n, coef, blst, gs_h, niter) &
265 result(ksp_results)
266 class(cheby_t), intent(inout) :: this
267 class(ax_t), intent(in) :: ax
268 type(field_t), intent(inout) :: x
269 integer, intent(in) :: n
270 real(kind=rp), dimension(n), intent(in) :: f
271 type(coef_t), intent(inout) :: coef
272 type(bc_list_t), intent(inout) :: blst
273 type(gs_t), intent(inout) :: gs_h
274 type(ksp_monitor_t) :: ksp_results
275 integer, optional, intent(in) :: niter
276 integer :: iter, max_iter
277 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
278 real(kind=rp) :: rhok, rhokp1, sig1, tmp1, tmp2
279
280 if (this%recompute_eigs) then
281 call cheby_power(this, ax, x, n, coef, blst, gs_h)
282 end if
283
284 if (present(niter)) then
285 max_iter = niter
286 else
287 max_iter = this%max_iter
288 end if
289 norm_fac = 1.0_rp / sqrt(coef%volume)
290
291 associate( w => this%w, r => this%r, d => this%d)
292 ! calculate residual
293 if (.not.this%zero_initial_guess) then
294 call ax%compute(w, x%x, coef, x%msh, x%Xh)
295 call gs_h%op(w, n, gs_op_add)
296 call blst%apply(w, n)
297 call sub3(r, f, w, n)
298 else
299 call copy(r, f, n)
300 this%zero_initial_guess = .false.
301 end if
302
303 ! First iteration
304 if (associated(this%schwarz)) then
305 call this%schwarz%compute(d, r)
306 else
307 call this%M%solve(d, r, n)
308 end if
309 call cmult( d, (1.0_rp / this%tha), n)
310 call add2( x%x, d, n)
311
312 sig1 = this%tha / this%dlt
313 rhok = 1.0_rp / sig1
314
315 ! Rest of the iterations
316 do iter = 2, max_iter
317 rhokp1 = 1.0_rp / (2.0_rp * sig1 - rhok)
318 tmp1 = rhokp1 * rhok
319 tmp2 = 2.0_rp * rhokp1 / this%dlt
320 rhok = rhokp1
321 ! calculate residual
322 call ax%compute(w, x%x, coef, x%msh, x%Xh)
323 call gs_h%op(w, n, gs_op_add)
324 call blst%apply(w, n)
325 call sub3(r, f, w, n)
326
327 if (associated(this%schwarz)) then
328 call this%schwarz%compute(w, r)
329 else
330 call this%M%solve(w, r, n)
331 end if
332 call cmult( d, tmp1, n)
333 call add2s2( d, w, tmp2, n)
334 call add2( x%x, d, n)
335 end do
336
337 end associate
338 end function cheby_impl
339
341 function cheby_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
342 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
343 class(cheby_t), intent(inout) :: this
344 class(ax_t), intent(in) :: ax
345 type(field_t), intent(inout) :: x
346 type(field_t), intent(inout) :: y
347 type(field_t), intent(inout) :: z
348 integer, intent(in) :: n
349 real(kind=rp), dimension(n), intent(in) :: fx
350 real(kind=rp), dimension(n), intent(in) :: fy
351 real(kind=rp), dimension(n), intent(in) :: fz
352 type(coef_t), intent(inout) :: coef
353 type(bc_list_t), intent(inout) :: blstx
354 type(bc_list_t), intent(inout) :: blsty
355 type(bc_list_t), intent(inout) :: blstz
356 type(gs_t), intent(inout) :: gs_h
357 type(ksp_monitor_t), dimension(3) :: ksp_results
358 integer, optional, intent(in) :: niter
359
360 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
361 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
362 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
363
364 end function cheby_solve_coupled
365
366end module cheby
367
368
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Chebyshev preconditioner.
Definition cheby.f90:34
type(ksp_monitor_t) function cheby_impl(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Definition cheby.f90:266
subroutine cheby_free(this)
Definition cheby.f90:110
type(ksp_monitor_t) function, dimension(3) cheby_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Chebyshev coupled solve.
Definition cheby.f90:343
subroutine cheby_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
Definition cheby.f90:72
subroutine cheby_power(this, ax, x, n, coef, blst, gs_h)
Definition cheby.f90:117
type(ksp_monitor_t) function cheby_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Definition cheby.f90:183
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:417
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:429
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1026
subroutine, public add2s1(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
Definition math.f90:803
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1007
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:244
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:787
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:732
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:255
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:211
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:774
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:818
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Overlapping schwarz solves.
Definition schwarz.f90:61
Defines a function space.
Definition space.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Defines a Chebyshev preconditioner.
Definition cheby.f90:52
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:62