Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cheby.f90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module cheby
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use gather_scatter, only : gs_t, gs_op_add
44 use bc_list, only : bc_list_t
45 use schwarz, only : schwarz_t
46 use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp, glsc2, &
48 implicit none
49 private
50
52 type, public, extends(ksp_t) :: cheby_t
53 real(kind=rp), allocatable :: d(:)
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 real(kind=rp) :: tha, dlt
57 integer :: power_its = 150
58 logical :: recompute_eigs = .true.
59 logical :: zero_initial_guess = .false.
60 type(schwarz_t), pointer :: schwarz => null()
61 contains
62 procedure, pass(this) :: init => cheby_init
63 procedure, pass(this) :: free => cheby_free
64 procedure, pass(this) :: solve => cheby_impl
65 procedure, pass(this) :: solve_coupled => cheby_solve_coupled
66 end type cheby_t
67
68contains
69
71 subroutine cheby_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
72 class(cheby_t), intent(inout), target :: this
73 integer, intent(in) :: max_iter
74 class(pc_t), optional, intent(in), target :: M
75 integer, intent(in) :: n
76 real(kind=rp), optional, intent(in) :: rel_tol
77 real(kind=rp), optional, intent(in) :: abs_tol
78 logical, optional, intent(in) :: monitor
79
80 call this%free()
81 allocate(this%d(n))
82 allocate(this%w(n))
83 allocate(this%r(n))
84
85 if (present(m)) then
86 this%M => m
87 end if
88
89 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
90 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
91 else if (present(rel_tol) .and. present(abs_tol)) then
92 call this%ksp_init(max_iter, rel_tol, abs_tol)
93 else if (present(monitor) .and. present(abs_tol)) then
94 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
95 else if (present(rel_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
97 else if (present(rel_tol)) then
98 call this%ksp_init(max_iter, rel_tol = rel_tol)
99 else if (present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol)
101 else if (present(monitor)) then
102 call this%ksp_init(max_iter, monitor = monitor)
103 else
104 call this%ksp_init(max_iter)
105 end if
106
107 end subroutine cheby_init
108
109 subroutine cheby_free(this)
110 class(cheby_t), intent(inout) :: this
111 if (allocated(this%d)) then
112 deallocate(this%d)
113 end if
114
115 if (allocated(this%w)) then
116 deallocate(this%w)
117 end if
118
119 if (allocated(this%r)) then
120 deallocate(this%r)
121 end if
122 end subroutine cheby_free
123
124 subroutine cheby_power(this, Ax, x, n, coef, blst, gs_h)
125 class(cheby_t), intent(inout) :: this
126 class(ax_t), intent(in) :: Ax
127 type(field_t), intent(inout) :: x
128 integer, intent(in) :: n
129 type(coef_t), intent(inout) :: coef
130 type(bc_list_t), intent(inout) :: blst
131 type(gs_t), intent(inout) :: gs_h
132 real(kind=rp) :: lam, b, a, rn
133 real(kind=rp) :: boost = 1.1_rp
134 real(kind=rp) :: lam_factor = 30.0_rp
135 real(kind=rp) :: wtw, dtw, dtd
136 integer :: i
137 associate(w => this%w, d => this%d, r => this%r)
138
139 do i = 1, n
140 !TODO: replace with a better way to initialize power method
141 call random_number(rn)
142 d(i) = rn + 10.0_rp
143 end do
144 call gs_h%op(d, n, gs_op_add)
145 call blst%apply(d, n)
146
147 !Power method to get lamba max
148 do i = 1, this%power_its
149 call ax%compute(w, d, coef, x%msh, x%Xh)
150 call gs_h%op(w, n, gs_op_add)
151 call blst%apply(w, n)
152 if (associated(this%schwarz)) then
153 call this%schwarz%compute(r, w)
154 call copy(w, r, n)
155 else
156 call this%M%solve(r, w, n)
157 call copy(w, r, n)
158 end if
159
160 wtw = glsc3(w, coef%mult, w, n)
161 call cmult2(d, w, 1.0_rp/sqrt(wtw), n)
162 call blst%apply(d, n)
163 end do
164
165 call ax%compute(w, d, coef, x%msh, x%Xh)
166 call gs_h%op(w, n, gs_op_add)
167 call blst%apply(w, n)
168 if (associated(this%schwarz)) then
169 call this%schwarz%compute(r, w)
170 call copy(w, r, n)
171 else
172 call this%M%solve(r, w, n)
173 call copy(w, r, n)
174 end if
175
176 dtw = glsc3(d, coef%mult, w, n)
177 dtd = glsc3(d, coef%mult, d, n)
178 lam = dtw / dtd
179 b = lam * boost
180 a = lam / lam_factor
181 this%tha = (b+a)/2.0_rp
182 this%dlt = (b-a)/2.0_rp
183
184 this%recompute_eigs = .false.
185 end associate
186 end subroutine cheby_power
187
189 function cheby_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
190 result(ksp_results)
191 class(cheby_t), intent(inout) :: this
192 class(ax_t), intent(in) :: ax
193 type(field_t), intent(inout) :: x
194 integer, intent(in) :: n
195 real(kind=rp), dimension(n), intent(in) :: f
196 type(coef_t), intent(inout) :: coef
197 type(bc_list_t), intent(inout) :: blst
198 type(gs_t), intent(inout) :: gs_h
199 type(ksp_monitor_t) :: ksp_results
200 integer, optional, intent(in) :: niter
201 integer :: iter, max_iter
202 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
203
204 if (this%recompute_eigs) then
205 call cheby_power(this, ax, x, n, coef, blst, gs_h)
206 end if
207
208 if (present(niter)) then
209 max_iter = niter
210 else
211 max_iter = this%max_iter
212 end if
213 norm_fac = 1.0_rp / sqrt(coef%volume)
214
215 associate( w => this%w, r => this%r, d => this%d)
216 ! calculate residual
217 call copy(r, f, n)
218 call ax%compute(w, x%x, coef, x%msh, x%Xh)
219 call gs_h%op(w, n, gs_op_add)
220 call blst%apply(w, n)
221 call sub2(r, w, n)
222
223 rtr = glsc3(r, coef%mult, r, n)
224 rnorm = sqrt(rtr) * norm_fac
225 ksp_results%res_start = rnorm
226 ksp_results%res_final = rnorm
227 ksp_results%iter = 0
228
229 ! First iteration
230 call this%M%solve(w, r, n)
231 call copy(d, w, n)
232 a = 2.0_rp / this%tha
233 call add2s2(x%x, d, a, n)! x = x + a*d
234
235 ! Rest of the iterations
236 do iter = 2, max_iter
237 ! calculate residual
238 call copy(r, f, n)
239 call ax%compute(w, x%x, coef, x%msh, x%Xh)
240 call gs_h%op(w, n, gs_op_add)
241 call blst%apply(w, n)
242 call sub2(r, w, n)
243
244 call this%M%solve(w, r, n)
245
246 if (iter .eq. 2) then
247 b = 0.5_rp * (this%dlt * a)**2
248 else
249 b = (this%dlt * a / 2.0_rp)**2
250 end if
251 a = 1.0_rp/(this%tha - b/a)
252 call add2s1(d, w, b, n)! d = w + b*d
253
254 call add2s2(x%x, d, a, n)! x = x + a*d
255 end do
256
257 ! calculate residual
258 call copy(r, f, n)
259 call ax%compute(w, x%x, coef, x%msh, x%Xh)
260 call gs_h%op(w, n, gs_op_add)
261 call blst%apply(w, n)
262 call sub2(r, w, n)
263 rtr = glsc3(r, coef%mult, r, n)
264 rnorm = sqrt(rtr) * norm_fac
265 ksp_results%res_final = rnorm
266 ksp_results%iter = iter
267 ksp_results%converged = this%is_converged(iter, rnorm)
268 end associate
269 end function cheby_solve
270
272 function cheby_impl(this, Ax, x, f, n, coef, blst, gs_h, niter) &
273 result(ksp_results)
274 class(cheby_t), intent(inout) :: this
275 class(ax_t), intent(in) :: ax
276 type(field_t), intent(inout) :: x
277 integer, intent(in) :: n
278 real(kind=rp), dimension(n), intent(in) :: f
279 type(coef_t), intent(inout) :: coef
280 type(bc_list_t), intent(inout) :: blst
281 type(gs_t), intent(inout) :: gs_h
282 type(ksp_monitor_t) :: ksp_results
283 integer, optional, intent(in) :: niter
284 integer :: iter, max_iter, i
285 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
286 real(kind=rp) :: rhok, rhokp1, sig1, tmp1, tmp2
287
288 if (this%recompute_eigs) then
289 call cheby_power(this, ax, x, n, coef, blst, gs_h)
290 end if
291
292 if (present(niter)) then
293 max_iter = niter
294 else
295 max_iter = this%max_iter
296 end if
297 norm_fac = 1.0_rp / sqrt(coef%volume)
298
299 associate( w => this%w, r => this%r, d => this%d)
300 ! calculate residual
301 if (.not.this%zero_initial_guess) then
302 call ax%compute(w, x%x, coef, x%msh, x%Xh)
303 call gs_h%op(w, n, gs_op_add)
304 call blst%apply(w, n)
305 call sub3(r, f, w, n)
306 else
307 call copy(r, f, n)
308 this%zero_initial_guess = .false.
309 end if
310
311 ! First iteration
312 if (associated(this%schwarz)) then
313 call this%schwarz%compute(d, r)
314 else
315 call this%M%solve(d, r, n)
316 end if
317
318 do concurrent(i = 1:n)
319 d(i) = 1.0_rp/this%tha * d(i)
320 x%x(i,1,1,1) = x%x(i,1,1,1) + d(i)
321 end do
322
323 sig1 = this%tha / this%dlt
324 rhok = 1.0_rp / sig1
325
326 ! Rest of the iterations
327 do iter = 2, max_iter
328 rhokp1 = 1.0_rp / (2.0_rp * sig1 - rhok)
329 tmp1 = rhokp1 * rhok
330 tmp2 = 2.0_rp * rhokp1 / this%dlt
331 rhok = rhokp1
332 ! calculate residual
333 call ax%compute(w, x%x, coef, x%msh, x%Xh)
334 call gs_h%op(w, n, gs_op_add)
335 call blst%apply(w, n)
336 call sub3(r, f, w, n)
337
338 if (associated(this%schwarz)) then
339 call this%schwarz%compute(w, r)
340 else
341 call this%M%solve(w, r, n)
342 end if
343 do concurrent(i = 1:n)
344 d(i) = tmp1 * d(i) + tmp2 * w(i)
345 x%x(i,1,1,1) = x%x(i,1,1,1) + d(i)
346 end do
347 end do
348
349 end associate
350 end function cheby_impl
351
353 function cheby_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
354 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
355 class(cheby_t), intent(inout) :: this
356 class(ax_t), intent(in) :: ax
357 type(field_t), intent(inout) :: x
358 type(field_t), intent(inout) :: y
359 type(field_t), intent(inout) :: z
360 integer, intent(in) :: n
361 real(kind=rp), dimension(n), intent(in) :: fx
362 real(kind=rp), dimension(n), intent(in) :: fy
363 real(kind=rp), dimension(n), intent(in) :: fz
364 type(coef_t), intent(inout) :: coef
365 type(bc_list_t), intent(inout) :: blstx
366 type(bc_list_t), intent(inout) :: blsty
367 type(bc_list_t), intent(inout) :: blstz
368 type(gs_t), intent(inout) :: gs_h
369 type(ksp_monitor_t), dimension(3) :: ksp_results
370 integer, optional, intent(in) :: niter
371
372 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
373 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
374 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
375
376 end function cheby_solve_coupled
377
378end module cheby
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Chebyshev preconditioner.
Definition cheby.f90:34
type(ksp_monitor_t) function cheby_impl(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Definition cheby.f90:274
subroutine cheby_free(this)
Definition cheby.f90:110
type(ksp_monitor_t) function, dimension(3) cheby_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Chebyshev coupled solve.
Definition cheby.f90:355
subroutine cheby_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
Definition cheby.f90:72
subroutine cheby_power(this, ax, x, n, coef, blst, gs_h)
Definition cheby.f90:125
type(ksp_monitor_t) function cheby_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Definition cheby.f90:191
Coefficients.
Definition coef.f90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public cmult2(a, b, c, n)
Multiplication by constant c .
Definition math.f90:423
real(kind=rp) function, public glsc3(a, b, c, n)
Weighted inner product .
Definition math.f90:1067
subroutine, public add2s1(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
Definition math.f90:797
real(kind=rp) function, public glsc2(a, b, n)
Weighted inner product .
Definition math.f90:1048
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
subroutine, public sub3(a, b, c, n)
Vector subtraction .
Definition math.f90:781
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
subroutine, public sub2(a, b, n)
Vector substraction .
Definition math.f90:768
subroutine, public add2s2(a, b, c1, n)
Vector addition with scalar multiplication (multiplication on second argument)
Definition math.f90:812
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Overlapping schwarz solves.
Definition schwarz.f90:61
Defines a function space.
Definition space.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Defines a Chebyshev preconditioner.
Definition cheby.f90:52
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:63