Neko  0.8.99
A portable framework for high-order spectral element flow simulations
cg_device.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cg_device
35  use num_types, only: rp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : abscmp
44  use device
47  implicit none
48 
50  type, public, extends(ksp_t) :: cg_device_t
51  real(kind=rp), allocatable :: w(:)
52  real(kind=rp), allocatable :: r(:)
53  real(kind=rp), allocatable :: p(:)
54  real(kind=rp), allocatable :: z(:)
55  type(c_ptr) :: w_d = c_null_ptr
56  type(c_ptr) :: r_d = c_null_ptr
57  type(c_ptr) :: p_d = c_null_ptr
58  type(c_ptr) :: z_d = c_null_ptr
59  type(c_ptr) :: gs_event = c_null_ptr
60  contains
61  procedure, pass(this) :: init => cg_device_init
62  procedure, pass(this) :: free => cg_device_free
63  procedure, pass(this) :: solve => cg_device_solve
64  procedure, pass(this) :: solve_coupled => cg_device_solve_coupled
65  end type cg_device_t
66 
67 contains
68 
70  subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
71  class(cg_device_t), intent(inout) :: this
72  class(pc_t), optional, intent(inout), target :: M
73  integer, intent(in) :: n
74  integer, intent(in) :: max_iter
75  real(kind=rp), optional, intent(inout) :: rel_tol
76  real(kind=rp), optional, intent(inout) :: abs_tol
77  logical, optional, intent(in) :: monitor
78 
79  call this%free()
80 
81  allocate(this%w(n))
82  allocate(this%r(n))
83  allocate(this%p(n))
84  allocate(this%z(n))
85 
86  call device_map(this%z, this%z_d, n)
87  call device_map(this%p, this%p_d, n)
88  call device_map(this%r, this%r_d, n)
89  call device_map(this%w, this%w_d, n)
90 
91  if (present(m)) then
92  this%M => m
93  end if
94 
95  if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
96  call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
97  else if (present(rel_tol) .and. present(abs_tol)) then
98  call this%ksp_init(max_iter, rel_tol, abs_tol)
99  else if (present(monitor) .and. present(abs_tol)) then
100  call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
101  else if (present(rel_tol) .and. present(monitor)) then
102  call this%ksp_init(max_iter, rel_tol, monitor = monitor)
103  else if (present(rel_tol)) then
104  call this%ksp_init(max_iter, rel_tol = rel_tol)
105  else if (present(abs_tol)) then
106  call this%ksp_init(max_iter, abs_tol = abs_tol)
107  else if (present(monitor)) then
108  call this%ksp_init(max_iter, monitor = monitor)
109  else
110  call this%ksp_init(max_iter)
111  end if
112 
113  call device_event_create(this%gs_event, 2)
114  end subroutine cg_device_init
115 
117  subroutine cg_device_free(this)
118  class(cg_device_t), intent(inout) :: this
119 
120  call this%ksp_free()
121 
122  if (allocated(this%w)) then
123  deallocate(this%w)
124  end if
125 
126  if (allocated(this%r)) then
127  deallocate(this%r)
128  end if
129 
130  if (allocated(this%p)) then
131  deallocate(this%p)
132  end if
133 
134  if (allocated(this%z)) then
135  deallocate(this%z)
136  end if
137 
138  nullify(this%M)
139 
140  if (c_associated(this%w_d)) then
141  call device_free(this%w_d)
142  end if
143 
144  if (c_associated(this%r_d)) then
145  call device_free(this%r_d)
146  end if
147 
148  if (c_associated(this%p_d)) then
149  call device_free(this%p_d)
150  end if
151 
152  if (c_associated(this%z_d)) then
153  call device_free(this%z_d)
154  end if
155 
156  if (c_associated(this%gs_event)) then
157  call device_event_destroy(this%gs_event)
158  end if
159 
160  end subroutine cg_device_free
161 
163  function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
164  class(cg_device_t), intent(inout) :: this
165  class(ax_t), intent(inout) :: ax
166  type(field_t), intent(inout) :: x
167  integer, intent(in) :: n
168  real(kind=rp), dimension(n), intent(inout) :: f
169  type(coef_t), intent(inout) :: coef
170  type(bc_list_t), intent(inout) :: blst
171  type(gs_t), intent(inout) :: gs_h
172  type(ksp_monitor_t) :: ksp_results
173  integer, optional, intent(in) :: niter
174  real(kind=rp), parameter :: one = 1.0
175  real(kind=rp), parameter :: zero = 0.0
176  integer :: iter, max_iter
177  real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
178  real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
179  type(c_ptr) :: f_d
180 
181  f_d = device_get_ptr(f)
182 
183  if (present(niter)) then
184  max_iter = niter
185  else
186  max_iter = this%max_iter
187  end if
188  norm_fac = one/sqrt(coef%volume)
189 
190  rtz1 = one
191  call device_rzero(x%x_d, n)
192  call device_rzero(this%p_d, n)
193  call device_copy(this%r_d, f_d, n)
194 
195  rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
196  rnorm = sqrt(rtr)*norm_fac
197  ksp_results%res_start = rnorm
198  ksp_results%res_final = rnorm
199  ksp_results%iter = 0
200  if(abscmp(rnorm, zero)) return
201  call this%monitor_start('CG')
202  do iter = 1, max_iter
203  call this%M%solve(this%z, this%r, n)
204  rtz2 = rtz1
205  rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
206  beta = rtz1 / rtz2
207  if (iter .eq. 1) beta = zero
208  call device_add2s1(this%p_d, this%z_d, beta, n)
209 
210  call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
211  call gs_h%op(this%w, n, gs_op_add, this%gs_event)
212  call device_event_sync(this%gs_event)
213  call bc_list_apply(blst, this%w, n)
214 
215  pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
216 
217  alpha = rtz1 / pap
218  alphm = -alpha
219  call device_add2s2(x%x_d, this%p_d, alpha, n)
220  call device_add2s2(this%r_d, this%w_d, alphm, n)
221 
222  rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
223  if (iter .eq. 1) rtr0 = rtr
224  rnorm = sqrt(rtr)*norm_fac
225  call this%monitor_iter(iter, rnorm)
226  if (rnorm .lt. this%abs_tol) then
227  exit
228  end if
229  end do
230  call this%monitor_stop()
231  ksp_results%res_final = rnorm
232  ksp_results%iter = iter
233 
234  end function cg_device_solve
235 
237  function cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
238  n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
239  class(cg_device_t), intent(inout) :: this
240  class(ax_t), intent(inout) :: ax
241  type(field_t), intent(inout) :: x
242  type(field_t), intent(inout) :: y
243  type(field_t), intent(inout) :: z
244  integer, intent(in) :: n
245  real(kind=rp), dimension(n), intent(inout) :: fx
246  real(kind=rp), dimension(n), intent(inout) :: fy
247  real(kind=rp), dimension(n), intent(inout) :: fz
248  type(coef_t), intent(inout) :: coef
249  type(bc_list_t), intent(inout) :: blstx
250  type(bc_list_t), intent(inout) :: blsty
251  type(bc_list_t), intent(inout) :: blstz
252  type(gs_t), intent(inout) :: gs_h
253  type(ksp_monitor_t), dimension(3) :: ksp_results
254  integer, optional, intent(in) :: niter
255 
256  ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
257  ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
258  ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
259 
260  end function cg_device_solve_coupled
261 
262 end module cg_device
263 
264 
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition: cg_device.f90:34
subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
Initialise a device based PCG solver.
Definition: cg_device.f90:71
type(ksp_monitor_t) function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition: cg_device.f90:164
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
Definition: cg_device.f90:118
type(ksp_monitor_t) function, dimension(3) cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Definition: cg_device.f90:239
Coefficients.
Definition: coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition: device.F90:1229
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition: device.F90:1191
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition: device.F90:1161
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:51
Definition: math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:104
Device based preconditioned conjugate gradient method.
Definition: cg_device.f90:50
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:66
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40