Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cheby_device.F90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use gather_scatter, only : gs_t, gs_op_add
44 use bc_list, only : bc_list_t
45 use schwarz, only : schwarz_t
49 use device
50 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
51 implicit none
52 private
53
55 type, public, extends(ksp_t) :: cheby_device_t
56 real(kind=rp), allocatable :: d(:)
57 real(kind=rp), allocatable :: w(:)
58 real(kind=rp), allocatable :: r(:)
59 type(c_ptr) :: d_d = c_null_ptr
60 type(c_ptr) :: w_d = c_null_ptr
61 type(c_ptr) :: r_d = c_null_ptr
62 type(c_ptr) :: gs_event = c_null_ptr
63 real(kind=rp) :: tha, dlt
64 integer :: power_its = 150
65 logical :: recompute_eigs = .true.
66 logical :: zero_initial_guess = .false.
67 type(schwarz_t), pointer :: schwarz => null()
68 contains
69 procedure, pass(this) :: init => cheby_device_init
70 procedure, pass(this) :: free => cheby_device_free
71 procedure, pass(this) :: solve => cheby_device_impl
72 procedure, pass(this) :: solve_coupled => cheby_device_solve_coupled
73 end type cheby_device_t
74
75contains
76
78 subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
79 class(cheby_device_t), intent(inout), target :: this
80 integer, intent(in) :: max_iter
81 class(pc_t), optional, intent(in), target :: M
82 integer, intent(in) :: n
83 real(kind=rp), optional, intent(in) :: rel_tol
84 real(kind=rp), optional, intent(in) :: abs_tol
85 logical, optional, intent(in) :: monitor
86
87 call this%free()
88 allocate(this%d(n))
89 allocate(this%w(n))
90 allocate(this%r(n))
91
92 call device_map(this%d, this%d_d, n)
93 call device_map(this%w, this%w_d, n)
94 call device_map(this%r, this%r_d, n)
95
96 if (present(m)) then
97 this%M => m
98 end if
99
100 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
101 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
102 else if (present(rel_tol) .and. present(abs_tol)) then
103 call this%ksp_init(max_iter, rel_tol, abs_tol)
104 else if (present(monitor) .and. present(abs_tol)) then
105 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
106 else if (present(rel_tol) .and. present(monitor)) then
107 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
108 else if (present(rel_tol)) then
109 call this%ksp_init(max_iter, rel_tol = rel_tol)
110 else if (present(abs_tol)) then
111 call this%ksp_init(max_iter, abs_tol = abs_tol)
112 else if (present(monitor)) then
113 call this%ksp_init(max_iter, monitor = monitor)
114 else
115 call this%ksp_init(max_iter)
116 end if
117
118 call device_event_create(this%gs_event, 2)
119
120 end subroutine cheby_device_init
121
122 subroutine cheby_device_free(this)
123 class(cheby_device_t), intent(inout) :: this
124
125 call this%ksp_free()
126
127 if (allocated(this%d)) then
128 deallocate(this%d)
129 end if
130
131 if (allocated(this%w)) then
132 deallocate(this%w)
133 end if
134
135 if (allocated(this%r)) then
136 deallocate(this%r)
137 end if
138
139 nullify(this%M)
140
141 if (c_associated(this%d_d)) then
142 call device_free(this%d_d)
143 end if
144
145 if (c_associated(this%w_d)) then
146 call device_free(this%w_d)
147 end if
148
149 if (c_associated(this%r_d)) then
150 call device_free(this%r_d)
151 end if
152
153 if (c_associated(this%gs_event)) then
154 call device_event_destroy(this%gs_event)
155 end if
156
157 end subroutine cheby_device_free
158
159 subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
160 class(cheby_device_t), intent(inout) :: this
161 class(ax_t), intent(in) :: Ax
162 type(field_t), intent(inout) :: x
163 integer, intent(in) :: n
164 type(coef_t), intent(inout) :: coef
165 type(bc_list_t), intent(inout) :: blst
166 type(gs_t), intent(inout) :: gs_h
167 real(kind=rp) :: lam, b, a, rn
168 real(kind=rp) :: boost = 1.1_rp
169 real(kind=rp) :: lam_factor = 30.0_rp
170 real(kind=rp) :: wtw, dtw, dtd
171 integer :: i
172
173 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
174
175 do i = 1, n
176 !TODO: replace with a better way to initialize power method
177 call random_number(rn)
178 d(i) = rn + 10.0_rp
179 end do
180 call device_memcpy(d, d_d, n, host_to_device, sync = .true.)
181
182 call gs_h%op(d, n, gs_op_add, this%gs_event)
183 call blst%apply(d, n)
184
185 !Power method to get lamba max
186 do i = 1, this%power_its
187 call ax%compute(w, d, coef, x%msh, x%Xh)
188 call gs_h%op(w, n, gs_op_add, this%gs_event)
189 call blst%apply(w, n)
190 if (associated(this%schwarz)) then
191 call this%schwarz%compute(this%r, w)
192 call device_copy(w_d, this%r_d, n)
193 else
194 call this%M%solve(this%r, w, n)
195 call device_copy(w_d, this%r_d, n)
196 end if
197
198 wtw = device_glsc3(w_d, coef%mult_d, w_d, n)
199 call device_cmult2(d_d, w_d, 1.0_rp/sqrt(wtw), n)
200 call blst%apply(d, n)
201 end do
202
203 call ax%compute(w, d, coef, x%msh, x%Xh)
204 call gs_h%op(w, n, gs_op_add, this%gs_event)
205 call blst%apply(w, n)
206 if (associated(this%schwarz)) then
207 call this%schwarz%compute(this%r, w)
208 call device_copy(w_d, this%r_d, n)
209 else
210 call this%M%solve(this%r, w, n)
211 call device_copy(w_d, this%r_d, n)
212 end if
213
214 dtw = device_glsc3(d_d, coef%mult_d, w_d, n)
215 dtd = device_glsc3(d_d, coef%mult_d, d_d, n)
216 lam = dtw / dtd
217 b = lam * boost
218 a = lam / lam_factor
219 this%tha = (b+a)/2.0_rp
220 this%dlt = (b-a)/2.0_rp
221
222 this%recompute_eigs = .false.
223 end associate
224 end subroutine cheby_device_power
225
227 function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
228 result(ksp_results)
229 class(cheby_device_t), intent(inout) :: this
230 class(ax_t), intent(in) :: ax
231 type(field_t), intent(inout) :: x
232 integer, intent(in) :: n
233 real(kind=rp), dimension(n), intent(in) :: f
234 type(coef_t), intent(inout) :: coef
235 type(bc_list_t), intent(inout) :: blst
236 type(gs_t), intent(inout) :: gs_h
237 type(ksp_monitor_t) :: ksp_results
238 integer, optional, intent(in) :: niter
239 integer :: iter, max_iter
240 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
241 type(c_ptr) :: f_d
242
243 f_d = device_get_ptr(f)
244
245 if (this%recompute_eigs) then
246 call cheby_device_power(this, ax, x, n, coef, blst, gs_h)
247 end if
248
249 if (present(niter)) then
250 max_iter = niter
251 else
252 max_iter = this%max_iter
253 end if
254 norm_fac = 1.0_rp / sqrt(coef%volume)
255
256 associate( w => this%w, r => this%r, d => this%d, &
257 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
258 ! calculate residual
259 call device_copy(r_d, f_d, n)
260 call ax%compute(w, x%x, coef, x%msh, x%Xh)
261 call gs_h%op(w, n, gs_op_add, this%gs_event)
262 call blst%apply(w, n)
263 call device_sub2(r_d, w_d, n)
264
265 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
266 rnorm = sqrt(rtr) * norm_fac
267 ksp_results%res_start = rnorm
268 ksp_results%res_final = rnorm
269 ksp_results%iter = 0
270
271 ! First iteration
272 call this%M%solve(w, r, n)
273 call device_copy(d_d, w_d, n)
274 a = 2.0_rp / this%tha
275 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
276
277 ! Rest of the iterations
278 do iter = 2, max_iter
279 ! calculate residual
280 call device_copy(r_d, f_d, n)
281 call ax%compute(w, x%x, coef, x%msh, x%Xh)
282 call gs_h%op(w, n, gs_op_add, this%gs_event)
283 call blst%apply(w, n)
284 call device_sub2(r_d, w_d, n)
285
286 call this%M%solve(w, r, n)
287
288 if (iter .eq. 2) then
289 b = 0.5_rp * (this%dlt * a)**2
290 else
291 b = (this%dlt * a / 2.0_rp)**2
292 end if
293 a = 1.0_rp/(this%tha - b/a)
294 call device_add2s1(d_d, w_d, b, n)! d = w + b*d
295
296 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
297 end do
298
299 ! calculate residual
300 call device_copy(r_d, f_d, n)
301 call ax%compute(w, x%x, coef, x%msh, x%Xh)
302 call gs_h%op(w, n, gs_op_add, this%gs_event)
303 call blst%apply(w, n)
304 call device_sub2(r_d, w_d, n)
305 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
306 rnorm = sqrt(rtr) * norm_fac
307
308
309 ksp_results%res_final = rnorm
310 ksp_results%iter = iter
311 ksp_results%converged = this%is_converged(iter, rnorm)
312 end associate
313 end function cheby_device_solve
314
316 function cheby_device_impl(this, Ax, x, f, n, coef, blst, gs_h, niter) &
317 result(ksp_results)
318 class(cheby_device_t), intent(inout) :: this
319 class(ax_t), intent(in) :: ax
320 type(field_t), intent(inout) :: x
321 integer, intent(in) :: n
322 real(kind=rp), dimension(n), intent(in) :: f
323 type(coef_t), intent(inout) :: coef
324 type(bc_list_t), intent(inout) :: blst
325 type(gs_t), intent(inout) :: gs_h
326 type(ksp_monitor_t) :: ksp_results
327 integer, optional, intent(in) :: niter
328 integer :: iter, max_iter
329 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
330 real(kind=rp) :: rhok, rhokp1, sig1, tmp1, tmp2
331 type(c_ptr) :: f_d
332
333 f_d = device_get_ptr(f)
334
335 if (this%recompute_eigs) then
336 call cheby_device_power(this, ax, x, n, coef, blst, gs_h)
337 end if
338
339 if (present(niter)) then
340 max_iter = niter
341 else
342 max_iter = this%max_iter
343 end if
344 norm_fac = 1.0_rp / sqrt(coef%volume)
345
346 associate( w => this%w, r => this%r, d => this%d, &
347 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
348 ! calculate residual
349 if (.not.this%zero_initial_guess) then
350 call ax%compute(w, x%x, coef, x%msh, x%Xh)
351 call gs_h%op(w, n, gs_op_add, this%gs_event)
352 call blst%apply(w, n)
353 call device_sub3(r_d, f_d, w_d, n)
354 else
355 call device_copy(r_d, f_d, n)
356 this%zero_initial_guess = .false.
357 end if
358
359 ! First iteration
360 if (associated(this%schwarz)) then
361 call this%schwarz%compute(d, r)
362 else
363 call this%M%solve(d, r, n)
364 end if
365 call device_cmult( d_d, (1.0_rp / this%tha), n)
366 call device_add2( x%x_d, d_d, n)
367
368 sig1 = this%tha / this%dlt
369 rhok = 1.0_rp / sig1
370
371 ! Rest of the iterations
372 do iter = 2, max_iter
373 rhokp1 = 1.0_rp / (2.0_rp * sig1 - rhok)
374 tmp1 = rhokp1 * rhok
375 tmp2 = 2.0_rp * rhokp1 / this%dlt
376 rhok = rhokp1
377 ! calculate residual
378 call ax%compute(w, x%x, coef, x%msh, x%Xh)
379 call gs_h%op(w, n, gs_op_add, this%gs_event)
380 call blst%apply(w, n)
381 call device_sub3(r_d, f_d, w_d, n)
382
383 if (associated(this%schwarz)) then
384 call this%schwarz%compute(w, r)
385 else
386 call this%M%solve(w, r, n)
387 end if
388 call device_cmult( d_d, tmp1, n)
389 call device_add2s2( d_d, w_d, tmp2, n)
390 call device_add2( x%x_d, d_d, n)
391 end do
392
393 end associate
394 end function cheby_device_impl
395
397 function cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
398 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
399 class(cheby_device_t), intent(inout) :: this
400 class(ax_t), intent(in) :: ax
401 type(field_t), intent(inout) :: x
402 type(field_t), intent(inout) :: y
403 type(field_t), intent(inout) :: z
404 integer, intent(in) :: n
405 real(kind=rp), dimension(n), intent(in) :: fx
406 real(kind=rp), dimension(n), intent(in) :: fy
407 real(kind=rp), dimension(n), intent(in) :: fz
408 type(coef_t), intent(inout) :: coef
409 type(bc_list_t), intent(inout) :: blstx
410 type(bc_list_t), intent(inout) :: blsty
411 type(bc_list_t), intent(inout) :: blstz
412 type(gs_t), intent(inout) :: gs_h
413 type(ksp_monitor_t), dimension(3) :: ksp_results
414 integer, optional, intent(in) :: niter
415
416 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
417 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
418 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
419
420 end function cheby_device_solve_coupled
421
422end module cheby_device
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
type(ksp_monitor_t) function cheby_device_impl(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1274
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1244
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Overlapping schwarz solves.
Definition schwarz.f90:61
Defines a function space.
Definition space.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:62