Neko  0.8.99
A portable framework for high-order spectral element flow simulations
cheby_device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
35  use krylov, only : ksp_t, ksp_monitor_t
36  use precon, only : pc_t
37  use ax_product, only : ax_t
38  use num_types, only: rp
39  use field, only : field_t
40  use coefs, only : coef_t
41  use mesh, only : mesh_t
42  use space, only : space_t
43  use gather_scatter, only : gs_t, gs_op_add
44  use bc, only : bc_list_t, bc_list_apply
45  use device_math, only : device_cmult2, device_sub2, &
47  use device
48  implicit none
49  private
50 
52  type, public, extends(ksp_t) :: cheby_device_t
53  real(kind=rp), allocatable :: d(:)
54  real(kind=rp), allocatable :: w(:)
55  real(kind=rp), allocatable :: r(:)
56  type(c_ptr) :: d_d = c_null_ptr
57  type(c_ptr) :: w_d = c_null_ptr
58  type(c_ptr) :: r_d = c_null_ptr
59  type(c_ptr) :: gs_event = c_null_ptr
60  real(kind=rp) :: tha, dlt
61  integer :: power_its = 150
62  logical :: recompute_eigs = .true.
63  contains
64  procedure, pass(this) :: init => cheby_device_init
65  procedure, pass(this) :: free => cheby_device_free
66  procedure, pass(this) :: solve => cheby_device_solve
67  procedure, pass(this) :: solve_coupled => cheby_device_solve_coupled
68  end type cheby_device_t
69 
70 contains
71 
73  subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
74  class(cheby_device_t), intent(inout), target :: this
75  integer, intent(in) :: max_iter
76  class(pc_t), optional, intent(inout), target :: M
77  integer, intent(in) :: n
78  real(kind=rp), optional, intent(inout) :: rel_tol
79  real(kind=rp), optional, intent(inout) :: abs_tol
80  logical, optional, intent(in) :: monitor
81 
82  call this%free()
83  allocate(this%d(n))
84  allocate(this%w(n))
85  allocate(this%r(n))
86 
87  call device_map(this%d, this%d_d, n)
88  call device_map(this%w, this%w_d, n)
89  call device_map(this%r, this%r_d, n)
90 
91  if (present(m)) then
92  this%M => m
93  end if
94 
95  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
96  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
97  else if (present(rel_tol) .and. present(abs_tol)) then
98  call this%ksp_init(max_iter, rel_tol, abs_tol)
99  else if (present(monitor) .and. present(abs_tol)) then
100  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
101  else if (present(rel_tol) .and. present(monitor)) then
102  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
103  else if (present(rel_tol)) then
104  call this%ksp_init(max_iter, rel_tol = rel_tol)
105  else if (present(abs_tol)) then
106  call this%ksp_init(max_iter, abs_tol = abs_tol)
107  else if (present(monitor)) then
108  call this%ksp_init(max_iter, monitor = monitor)
109  else
110  call this%ksp_init(max_iter)
111  end if
112 
113  call device_event_create(this%gs_event, 2)
114 
115  end subroutine cheby_device_init
116 
117  subroutine cheby_device_free(this)
118  class(cheby_device_t), intent(inout) :: this
119 
120  call this%ksp_free()
121 
122  if (allocated(this%d)) then
123  deallocate(this%d)
124  end if
125 
126  if (allocated(this%w)) then
127  deallocate(this%w)
128  end if
129 
130  if (allocated(this%r)) then
131  deallocate(this%r)
132  end if
133 
134  nullify(this%M)
135 
136  if (c_associated(this%d_d)) then
137  call device_free(this%d_d)
138  end if
139 
140  if (c_associated(this%w_d)) then
141  call device_free(this%w_d)
142  end if
143 
144  if (c_associated(this%r_d)) then
145  call device_free(this%r_d)
146  end if
147 
148  if (c_associated(this%gs_event)) then
149  call device_event_destroy(this%gs_event)
150  end if
151 
152  end subroutine cheby_device_free
153 
154  subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
155  class(cheby_device_t), intent(inout) :: this
156  class(ax_t), intent(inout) :: Ax
157  type(field_t), intent(inout) :: x
158  integer, intent(in) :: n
159  type(coef_t), intent(inout) :: coef
160  type(bc_list_t), intent(inout) :: blst
161  type(gs_t), intent(inout) :: gs_h
162  real(kind=rp) :: lam, b, a, rn
163  real(kind=rp) :: boost = 1.2_rp
164  real(kind=rp) :: lam_factor = 30.0_rp
165  real(kind=rp) :: wtw, dtw, dtd
166  integer :: i
167 
168  associate(w => this%w, w_d => this%w_d, d => this%d, d_d => this%d_d)
169 
170  do i = 1, n
171  !TODO: replace with a better way to initialize power method
172  call random_number(rn)
173  d(i) = rn + 10.0_rp
174  end do
175  call device_memcpy(d, d_d, n, host_to_device, sync = .true.)
176 
177  call gs_h%op(d, n, gs_op_add, this%gs_event)
178  call bc_list_apply(blst, d, n)
179 
180  !Power method to get lamba max
181  do i = 1, this%power_its
182  call ax%compute(w, d, coef, x%msh, x%Xh)
183  call gs_h%op(w, n, gs_op_add, this%gs_event)
184  call bc_list_apply(blst, w, n)
185 
186  wtw = device_glsc3(w_d, coef%mult_d, w_d, n)
187  call device_cmult2(d_d, w_d, 1.0_rp/sqrt(wtw), n)
188  call bc_list_apply(blst, d, n)
189  end do
190 
191  call ax%compute(w, d, coef, x%msh, x%Xh)
192  call gs_h%op(w, n, gs_op_add, this%gs_event)
193  call bc_list_apply(blst, w, n)
194 
195  dtw = device_glsc3(d_d, coef%mult_d, w_d, n)
196  dtd = device_glsc3(d_d, coef%mult_d, d_d, n)
197  lam = dtw / dtd
198  b = lam * boost
199  a = lam / lam_factor
200  this%tha = (b+a)/2.0_rp
201  this%dlt = (b-a)/2.0_rp
202 
203  this%recompute_eigs = .false.
204  end associate
205  end subroutine cheby_device_power
206 
208  function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) &
209  result(ksp_results)
210  class(cheby_device_t), intent(inout) :: this
211  class(ax_t), intent(inout) :: ax
212  type(field_t), intent(inout) :: x
213  integer, intent(in) :: n
214  real(kind=rp), dimension(n), intent(inout) :: f
215  type(coef_t), intent(inout) :: coef
216  type(bc_list_t), intent(inout) :: blst
217  type(gs_t), intent(inout) :: gs_h
218  type(ksp_monitor_t) :: ksp_results
219  integer, optional, intent(in) :: niter
220  integer :: iter, max_iter
221  real(kind=rp) :: a, b, rtr, rnorm, norm_fac
222  type(c_ptr) :: f_d
223 
224  f_d = device_get_ptr(f)
225 
226  if (this%recompute_eigs) then
227  call cheby_device_power(this, ax, x, n, coef, blst, gs_h)
228  end if
229 
230  if (present(niter)) then
231  max_iter = niter
232  else
233  max_iter = this%max_iter
234  end if
235  norm_fac = 1.0_rp / sqrt(coef%volume)
236 
237  associate( w => this%w, r => this%r, d => this%d, &
238  w_d => this%w_d, r_d => this%r_d, d_d => this%d_d)
239  ! calculate residual
240  call device_copy(r_d, f_d, n)
241  call ax%compute(w, x%x, coef, x%msh, x%Xh)
242  call gs_h%op(w, n, gs_op_add, this%gs_event)
243  call bc_list_apply(blst, w, n)
244  call device_sub2(r_d, w_d, n)
245 
246  rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
247  rnorm = sqrt(rtr) * norm_fac
248  ksp_results%res_start = rnorm
249  ksp_results%res_final = rnorm
250  ksp_results%iter = 0
251 
252  ! First iteration
253  call this%M%solve(w, r, n)
254  call device_copy(d_d, w_d, n)
255  a = 2.0_rp / this%tha
256  call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
257 
258  ! Rest of the iterations
259  do iter = 2, max_iter
260  ! calculate residual
261  call device_copy(r_d, f_d, n)
262  call ax%compute(w, x%x, coef, x%msh, x%Xh)
263  call gs_h%op(w, n, gs_op_add, this%gs_event)
264  call bc_list_apply(blst, w, n)
265  call device_sub2(r_d, w_d, n)
266 
267  call this%M%solve(w, r, n)
268 
269  b = (this%dlt * a / 2.0_rp)**2
270  a = 1.0_rp / (this%tha - b)
271  call device_add2s1(d_d, w_d, b, n)! d = w + b*d
272 
273  call device_add2s2(x%x_d, d_d, a, n)! x = x + a*d
274  end do
275 
276  ! calculate residual
277  call device_copy(r_d, f_d, n)
278  call ax%compute(w, x%x, coef, x%msh, x%Xh)
279  call gs_h%op(w, n, gs_op_add, this%gs_event)
280  call bc_list_apply(blst, w, n)
281  call device_sub2(r_d, w_d, n)
282  rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
283  rnorm = sqrt(rtr) * norm_fac
284  ksp_results%res_final = rnorm
285  ksp_results%iter = iter
286  end associate
287  end function cheby_device_solve
288 
290  function cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
291  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
292  class(cheby_device_t), intent(inout) :: this
293  class(ax_t), intent(inout) :: ax
294  type(field_t), intent(inout) :: x
295  type(field_t), intent(inout) :: y
296  type(field_t), intent(inout) :: z
297  integer, intent(in) :: n
298  real(kind=rp), dimension(n), intent(inout) :: fx
299  real(kind=rp), dimension(n), intent(inout) :: fy
300  real(kind=rp), dimension(n), intent(inout) :: fz
301  type(coef_t), intent(inout) :: coef
302  type(bc_list_t), intent(inout) :: blstx
303  type(bc_list_t), intent(inout) :: blsty
304  type(bc_list_t), intent(inout) :: blstz
305  type(gs_t), intent(inout) :: gs_h
306  type(ksp_monitor_t), dimension(3) :: ksp_results
307  integer, optional, intent(in) :: niter
308 
309  ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
310  ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
311  ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
312 
313  end function cheby_device_solve_coupled
314 
315 end module cheby_device
316 
317 
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Chebyshev preconditioner.
subroutine cheby_device_power(this, Ax, x, n, coef, blst, gs_h)
subroutine cheby_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a standard solver.
subroutine cheby_device_free(this)
type(ksp_monitor_t) function, dimension(3) cheby_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard Cheby_Deviceshev coupled solve.
type(ksp_monitor_t) function cheby_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
A chebyshev preconditioner.
Coefficients.
Definition: coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition: device.F90:1191
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition: device.F90:1161
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
Defines a mesh.
Definition: mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Defines a function space.
Definition: space.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Defines a Chebyshev preconditioner.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40
The function space for the SEM solution fields.
Definition: space.f90:62