Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cheby_device.F90
Go to the documentation of this file.
1! Copyright (c) 2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use krylov, only : ksp_t, ksp_monitor_t
36 use precon, only : pc_t
37 use ax_product, only : ax_t
38 use num_types, only: rp
39 use field, only : field_t
40 use coefs, only : coef_t
41 use mesh, only : mesh_t
42 use space, only : space_t
43 use gather_scatter, only : gs_t, gs_op_add
44 use bc_list, only : bc_list_t
47 use device
48 implicit none
49 private
50
52 type, public, extends(ksp_t) :: cheby_device_t
53 real(kind=rp), allocatable :: d(:)
54 real(kind=rp), allocatable :: w(:)
55 real(kind=rp), allocatable :: r(:)
56 type(c_ptr) :: d_d = c_null_ptr
57 type(c_ptr) :: w_d = c_null_ptr
58 type(c_ptr) :: r_d = c_null_ptr
59 type(c_ptr) :: gs_event = c_null_ptr
60 real(kind=rp) :: tha, dlt
61 integer :: power_its = 150
62 logical :: recompute_eigs = .true.
63 contains
64 procedure, pass(this) :: init => cheby_device_init
65 procedure, pass(this) :: free => cheby_device_free
66 procedure, pass(this) :: solve => cheby_device_solve
67 procedure, pass(this) :: solve_coupled => cheby_device_solve_coupled
68 end type cheby_device_t
69
70contains
71
73 subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
74 class(cheby_device_t), intent(inout), target :: this
75 integer, intent(in) :: max_iter
76 class(pc_t), optional, intent(in), target :: M
77 integer, intent(in) :: n
78 real(kind=rp), optional, intent(in) :: rel_tol
79 real(kind=rp), optional, intent(in) :: abs_tol
80 logical, optional, intent(in) :: monitor
81
82 call this%free()
83 allocate(this%d(n))
84 allocate(this%w(n))
85 allocate(this%r(n))
86
87 call device_map(this%d, this%d_d, n)
88 call device_map(this%w, this%w_d, n)
89 call device_map(this%r, this%r_d, n)
90
91 if (present(m)) then
92 this%M => m
93 end if
94
95 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
97 else if (present(rel_tol) .and. present(abs_tol)) then
98 call this%ksp_init(max_iter, rel_tol, abs_tol)
99 else if (present(monitor) .and. present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
101 else if (present(rel_tol) .and. present(monitor)) then
102 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
103 else if (present(rel_tol)) then
104 call this%ksp_init(max_iter, rel_tol = rel_tol)
105 else if (present(abs_tol)) then
106 call this%ksp_init(max_iter, abs_tol = abs_tol)
107 else if (present(monitor)) then
108 call this%ksp_init(max_iter, monitor = monitor)
109 else
110 call this%ksp_init(max_iter)
111 end if
112
113 call device_event_create(this%gs_event, 2)
114
115 end subroutine cheby_device_init
116
117 subroutine cheby_device_free(this)
118 class(cheby_device_t), intent(inout) :: this
119
120 call this%ksp_free()
121
122 if (allocated(this%d)) then
123 deallocate(this%d)
124 end if
125
126 if (allocated(this%w)) then
127 deallocate(this%w)
128 end if
129
130 if (allocated(this%r)) then
131 deallocate(this%r)
132 end if
133
134 nullify(this%M)
135
136 if (c_associated(this%d_d)) then
137 call device_free(this%d_d)
138 end if
139
140 if (c_associated(this%w_d)) then
141 call device_free(this%w_d)
142 end if
143
144 if (c_associated(this%r_d)) then
145 call device_free(this%r_d)
146 end if
147
148 if (c_associated(this%gs_event)) then
149 call device_event_destroy(this%gs_event)
150 end if
151
152 end subroutine cheby_device_free
153
154 subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
155 class(cheby_device_t), intent(inout) :: this
156 class(ax_t), intent(in) :: Ax
157 type(field_t), intent(inout) :: x
158 integer, intent(in) :: n
159 type(coef_t), intent(inout) :: coef
160 type(bc_list_t), intent(inout) :: blst
161 type(gs_t), intent(inout) :: gs_h
162 real(kind=rp) :: lam, b, a, rn
163 real(kind=rp) :: boost = 1.2_rp
164 real(kind=rp) :: lam_factor = 30.0_rp
165 real(kind=rp) :: wtw, dtw, dtd
166 integer :: i
167
168 associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
169
170 do i = 1, n
171 !TODO: replace with a better way to initialize power method
172 call random_number(rn)
173 d(i) = rn + 10.0_rp
174 end do
175 call device_memcpy(d, d_d, n, host_to_device, sync = .true.)
176
177 call gs_h%op(d, n, gs_op_add, this%gs_event)
178 call blst%apply(d, n)
179
180 !Power method to get lamba max
181 do i = 1, this%power_its
182 call ax%compute(w, d, coef, x%msh, x%Xh)
183 call gs_h%op(w, n, gs_op_add, this%gs_event)
184 call blst%apply(w, n)
185
186 wtw = device_glsc3(w_d, coef%mult_d, w_d, n)
187 call device_cmult2(d_d, w_d, 1.0_rp/sqrt(wtw), n)
188 call blst%apply(d, n)
189 end do
190
191 call ax%compute(w, d, coef, x%msh, x%Xh)
192 call gs_h%op(w, n, gs_op_add, this%gs_event)
193 call blst%apply(w, n)
194
195 dtw = device_glsc3(d_d, coef%mult_d, w_d, n)
196 dtd = device_glsc3(d_d, coef%mult_d, d_d, n)
197 lam = dtw / dtd
198 b = lam * boost
199 a = lam / lam_factor
200 this%tha = (b+a)/2.0_rp
201 this%dlt = (b-a)/2.0_rp
202
203 this%recompute_eigs = .false.
204 end associate
205 end subroutine cheby_device_power
206
208 function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
209 result(ksp_results)
210 class(cheby_device_t), intent(inout) :: this
211 class(ax_t), intent(in) :: ax
212 type(field_t), intent(inout) :: x
213 integer, intent(in) :: n
214 real(kind=rp), dimension(n), intent(in) :: f
215 type(coef_t), intent(inout) :: coef
216 type(bc_list_t), intent(inout) :: blst
217 type(gs_t), intent(inout) :: gs_h
218 type(ksp_monitor_t) :: ksp_results
219 integer, optional, intent(in) :: niter
220 integer :: iter, max_iter
221 real(kind=rp) :: a, b, rtr, rnorm, norm_fac
222 type(c_ptr) :: f_d
223
224 f_d = device_get_ptr(f)
225
226 if (this%recompute_eigs) then
227 call cheby_device_power(this, ax, x, n, coef, blst, gs_h)
228 end if
229
230 if (present(niter)) then
231 max_iter = niter
232 else
233 max_iter = this%max_iter
234 end if
235 norm_fac = 1.0_rp / sqrt(coef%volume)
236
237 associate( w => this%w, r => this%r, d => this%d, &
238 w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
239 ! calculate residual
240 call device_copy(r_d, f_d, n)
241 call ax%compute(w, x%x, coef, x%msh, x%Xh)
242 call gs_h%op(w, n, gs_op_add, this%gs_event)
243 call blst%apply(w, n)
244 call device_sub2(r_d, w_d, n)
245
246 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
247 rnorm = sqrt(rtr) * norm_fac
248 ksp_results%res_start = rnorm
249 ksp_results%res_final = rnorm
250 ksp_results%iter = 0
251
252 ! First iteration
253 call this%M%solve(w, r, n)
254 call device_copy(d_d, w_d, n)
255 a = 2.0_rp / this%tha
256 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
257
258 ! Rest of the iterations
259 do iter = 2, max_iter
260 ! calculate residual
261 call device_copy(r_d, f_d, n)
262 call ax%compute(w, x%x, coef, x%msh, x%Xh)
263 call gs_h%op(w, n, gs_op_add, this%gs_event)
264 call blst%apply(w, n)
265 call device_sub2(r_d, w_d, n)
266
267 call this%M%solve(w, r, n)
268
269 if (iter .eq. 2) then
270 b = 0.5_rp * (this%dlt * a)**2
271 else
272 b = (this%dlt * a / 2.0_rp)**2
273 end if
274 a = 1.0_rp/(this%tha - b/a)
275 call device_add2s1(d_d, w_d, b, n)! d = w + b*d
276
277 call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
278 end do
279
280 ! calculate residual
281 call device_copy(r_d, f_d, n)
282 call ax%compute(w, x%x, coef, x%msh, x%Xh)
283 call gs_h%op(w, n, gs_op_add, this%gs_event)
284 call blst%apply(w, n)
285 call device_sub2(r_d, w_d, n)
286 rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
287 rnorm = sqrt(rtr) * norm_fac
288
289
290 ksp_results%res_final = rnorm
291 ksp_results%iter = iter
292 ksp_results%converged = this%is_converged(iter, rnorm)
293 end associate
294 end function cheby_device_solve
295
297 function cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
298 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
299 class(cheby_device_t), intent(inout) :: this
300 class(ax_t), intent(in) :: ax
301 type(field_t), intent(inout) :: x
302 type(field_t), intent(inout) :: y
303 type(field_t), intent(inout) :: z
304 integer, intent(in) :: n
305 real(kind=rp), dimension(n), intent(in) :: fx
306 real(kind=rp), dimension(n), intent(in) :: fy
307 real(kind=rp), dimension(n), intent(in) :: fz
308 type(coef_t), intent(inout) :: coef
309 type(bc_list_t), intent(inout) :: blstx
310 type(bc_list_t), intent(inout) :: blsty
311 type(bc_list_t), intent(inout) :: blstz
312 type(gs_t), intent(inout) :: gs_h
313 type(ksp_monitor_t), dimension(3) :: ksp_results
314 integer, optional, intent(in) :: niter
315
316 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
317 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
318 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
319
320 end function cheby_device_solve_coupled
321
322end module cheby_device
323
324
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Chebyshev preconditioner.
subroutine cheby_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a standard solver.
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
subroutine cheby_device_free(this)
subroutine cheby_device_power(this, ax, x, n, coef, blst, gs_h)
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1194
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1164
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Defines a function space.
Definition space.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40
The function space for the SEM solution fields.
Definition space.f90:62