Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
cheby_device.F90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use gather_scatter, only : gs_t, gs_op_add
44 use bc_list, only : bc_list_t
47 use device
48 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
49 implicit none
50 private
51
53 type, public, extends(ksp_t) :: cheby_device_t
54 real(kind=rp), allocatable :: d(:)
55 real(kind=rp), allocatable :: w(:)
56 real(kind=rp), allocatable :: r(:)
57 type(c_ptr) :: d_d = c_null_ptr
58 type(c_ptr) :: w_d = c_null_ptr
59 type(c_ptr) :: r_d = c_null_ptr
60 type(c_ptr) :: gs_event = c_null_ptr
61 real(kind=rp) :: tha, dlt
62 integer :: power_its = 150
63 logical :: recompute_eigs = .true.
64 contains
65 procedure, pass(this) :: init => cheby_device_init
66 procedure, pass(this) :: free => cheby_device_free
67 procedure, pass(this) :: solve => cheby_device_solve
68 procedure, pass(this) :: solve_coupled => cheby_device_solve_coupled
69 end type cheby_device_t
70
71contains
72
74 subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
75 class(cheby_device_t), intent(inout), target :: this
76 integer, intent(in) :: max_iter
77 class(pc_t), optional, intent(in), target :: M
78 integer, intent(in) :: n
79 real(kind=rp), optional, intent(in) :: rel_tol
80 real(kind=rp), optional, intent(in) :: abs_tol
81 logical, optional, intent(in) :: monitor
82
83 call this%free()
84 allocate(this%d(n))
85 allocate(this%w(n))
86 allocate(this%r(n))
87
88 call device_map(this%d, this%d_d, n)
89 call device_map(this%w, this%w_d, n)
90 call device_map(this%r, this%r_d, n)
91
92 if (present(m)) then
93 this%M => m
94 end if
95
96 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
97 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
98 else if (present(rel_tol) .and. present(abs_tol)) then
99 call this%ksp_init(max_iter, rel_tol, abs_tol)
100 else if (present(monitor) .and. present(abs_tol)) then
101 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
102 else if (present(rel_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
104 else if (present(rel_tol)) then
105 call this%ksp_init(max_iter, rel_tol = rel_tol)
106 else if (present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol)
108 else if (present(monitor)) then
109 call this%ksp_init(max_iter, monitor = monitor)
110 else
111 call this%ksp_init(max_iter)
112 end if
113
114 call device_event_create(this%gs_event, 2)
115
116 end subroutine cheby_device_init
117
118 subroutine cheby_device_free(this)
119 class(cheby_device_t), intent(inout) :: this
120
121 call this%ksp_free()
122
123 if (allocated(this%d)) then
124 deallocate(this%d)
125 end if
126
127 if (allocated(this%w)) then
128 deallocate(this%w)
129 end if
130
131 if (allocated(this%r)) then
132 deallocate(this%r)
133 end if
134
135 nullify(this%M)
136
137 if (c_associated(this%d_d)) then
138 call device_free(this%d_d)
139 end if
140
141 if (c_associated(this%w_d)) then
142 call device_free(this%w_d)
143 end if
144
145 if (c_associated(this%r_d)) then
146 call device_free(this%r_d)
147 end if
148
149 if (c_associated(this%gs_event)) then
150 call device_event_destroy(this%gs_event)
151 end if
152
153 end subroutine cheby_device_free
154
155 subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
156 class(cheby_device_t), intent(inout) :: this
157 class(ax_t), intent(in) :: Ax
158 type(field_t), intent(inout) :: x
159 integer, intent(in) :: n
160 type(coef_t), intent(inout) :: coef
161 type(bc_list_t), intent(inout) :: blst
162 type(gs_t), intent(inout) :: gs_h
163 real(kind=rp) :: lam, b, a, rn
164 real(kind=rp) :: boost = 1.2_rp
165 real(kind=rp) :: lam_factor = 30.0_rp
166 real(kind=rp) :: wtw, dtw, dtd
167 integer :: i
168
169 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
170
171 do i = 1, n
172 !TODO: replace with a better way to initialize power method
173 call random_number(rn)
174 d(i) = rn + 10.0_rp
175 end do
176 call device_memcpy(d, d_d, n, host_to_device, sync = .true.)
177
178 call gs_h%op(d, n, gs_op_add, this%gs_event)
179 call blst%apply(d, n)
180
181 !Power method to get lamba max
182 do i = 1, this%power_its
183 call ax%compute(w, d, coef, x%msh, x%Xh)
184 call gs_h%op(w, n, gs_op_add, this%gs_event)
185 call blst%apply(w, n)
186
187 wtw = device_glsc3(w_d, coef%mult_d, w_d, n)
188 call device_cmult2(d_d, w_d, 1.0_rp/sqrt(wtw), n)
189 call blst%apply(d, n)
190 end do
191
192 call ax%compute(w, d, coef, x%msh, x%Xh)
193 call gs_h%op(w, n, gs_op_add, this%gs_event)
194 call blst%apply(w, n)
195
196 dtw = device_glsc3(d_d, coef%mult_d, w_d, n)
197 dtd = device_glsc3(d_d, coef%mult_d, d_d, n)
198 lam = dtw / dtd
199 b = lam * boost
200 a = lam / lam_factor
201 this%tha = (b+a)/2.0_rp
202 this%dlt = (b-a)/2.0_rp
203
204 this%recompute_eigs = .false.
205 end associate
206 end subroutine cheby_device_power
207
209 function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
210 result(ksp_results)
211 class(cheby_device_t), intent(inout) :: this
212 class(ax_t), intent(in) :: ax
213 type(field_t), intent(inout) :: x
214 integer, intent(in) :: n
215 real(kind=rp), dimension(n), intent(in) :: f
216 type(coef_t), intent(inout) :: coef
217 type(bc_list_t), intent(inout) :: blst
218 type(gs_t), intent(inout) :: gs_h
219 type(ksp_monitor_t) :: ksp_results
220 integer, optional, intent(in) :: niter
221 integer :: iter, max_iter
222 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
223 type(c_ptr) :: f_d
224
225 f_d = device_get_ptr(f)
226
227 if (this%recompute_eigs) then
228 call cheby_device_power(this, ax, x, n, coef, blst, gs_h)
229 end if
230
231 if (present(niter)) then
232 max_iter = niter
233 else
234 max_iter = this%max_iter
235 end if
236 norm_fac = 1.0_rp / sqrt(coef%volume)
237
238 associate( w => this%w, r => this%r, d => this%d, &
239 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
240 ! calculate residual
241 call device_copy(r_d, f_d, n)
242 call ax%compute(w, x%x, coef, x%msh, x%Xh)
243 call gs_h%op(w, n, gs_op_add, this%gs_event)
244 call blst%apply(w, n)
245 call device_sub2(r_d, w_d, n)
246
247 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
248 rnorm = sqrt(rtr) * norm_fac
249 ksp_results%res_start = rnorm
250 ksp_results%res_final = rnorm
251 ksp_results%iter = 0
252
253 ! First iteration
254 call this%M%solve(w, r, n)
255 call device_copy(d_d, w_d, n)
256 a = 2.0_rp / this%tha
257 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
258
259 ! Rest of the iterations
260 do iter = 2, max_iter
261 ! calculate residual
262 call device_copy(r_d, f_d, n)
263 call ax%compute(w, x%x, coef, x%msh, x%Xh)
264 call gs_h%op(w, n, gs_op_add, this%gs_event)
265 call blst%apply(w, n)
266 call device_sub2(r_d, w_d, n)
267
268 call this%M%solve(w, r, n)
269
270 if (iter .eq. 2) then
271 b = 0.5_rp * (this%dlt * a)**2
272 else
273 b = (this%dlt * a / 2.0_rp)**2
274 end if
275 a = 1.0_rp/(this%tha - b/a)
276 call device_add2s1(d_d, w_d, b, n)! d = w + b*d
277
278 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
279 end do
280
281 ! calculate residual
282 call device_copy(r_d, f_d, n)
283 call ax%compute(w, x%x, coef, x%msh, x%Xh)
284 call gs_h%op(w, n, gs_op_add, this%gs_event)
285 call blst%apply(w, n)
286 call device_sub2(r_d, w_d, n)
287 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
288 rnorm = sqrt(rtr) * norm_fac
289
290
291 ksp_results%res_final = rnorm
292 ksp_results%iter = iter
293 ksp_results%converged = this%is_converged(iter, rnorm)
294 end associate
295 end function cheby_device_solve
296
298 function cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
299 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
300 class(cheby_device_t), intent(inout) :: this
301 class(ax_t), intent(in) :: ax
302 type(field_t), intent(inout) :: x
303 type(field_t), intent(inout) :: y
304 type(field_t), intent(inout) :: z
305 integer, intent(in) :: n
306 real(kind=rp), dimension(n), intent(in) :: fx
307 real(kind=rp), dimension(n), intent(in) :: fy
308 real(kind=rp), dimension(n), intent(in) :: fz
309 type(coef_t), intent(inout) :: coef
310 type(bc_list_t), intent(inout) :: blstx
311 type(bc_list_t), intent(inout) :: blsty
312 type(bc_list_t), intent(inout) :: blstz
313 type(gs_t), intent(inout) :: gs_h
314 type(ksp_monitor_t), dimension(3) :: ksp_results
315 integer, optional, intent(in) :: niter
316
317 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
318 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
319 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
320
321 end function cheby_device_solve_coupled
322
323end module cheby_device
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:46
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1209
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1179
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Defines a function space.
Definition space.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:62