Neko  0.8.1
A portable framework for high-order spectral element flow simulations
cg_device.f90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cg_device
35  use num_types, only: rp
37  use precon, only : pc_t
38  use ax_product, only : ax_t
39  use field, only : field_t
40  use coefs, only : coef_t
41  use gather_scatter, only : gs_t, gs_op_add
42  use bc, only : bc_list_t, bc_list_apply
43  use math, only : abscmp
44  use device
47  implicit none
48 
50  type, public, extends(ksp_t) :: cg_device_t
51  real(kind=rp), allocatable :: w(:)
52  real(kind=rp), allocatable :: r(:)
53  real(kind=rp), allocatable :: p(:)
54  real(kind=rp), allocatable :: z(:)
55  type(c_ptr) :: w_d = c_null_ptr
56  type(c_ptr) :: r_d = c_null_ptr
57  type(c_ptr) :: p_d = c_null_ptr
58  type(c_ptr) :: z_d = c_null_ptr
59  type(c_ptr) :: gs_event = c_null_ptr
60  contains
61  procedure, pass(this) :: init => cg_device_init
62  procedure, pass(this) :: free => cg_device_free
63  procedure, pass(this) :: solve => cg_device_solve
64  end type cg_device_t
65 
66 contains
67 
69  subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
70  class(cg_device_t), intent(inout) :: this
71  class(pc_t), optional, intent(inout), target :: M
72  integer, intent(in) :: n
73  integer, intent(in) :: max_iter
74  real(kind=rp), optional, intent(inout) :: rel_tol
75  real(kind=rp), optional, intent(inout) :: abs_tol
76 
77  call this%free()
78 
79  allocate(this%w(n))
80  allocate(this%r(n))
81  allocate(this%p(n))
82  allocate(this%z(n))
83 
84  call device_map(this%z, this%z_d, n)
85  call device_map(this%p, this%p_d, n)
86  call device_map(this%r, this%r_d, n)
87  call device_map(this%w, this%w_d, n)
88 
89  if (present(m)) then
90  this%M => m
91  end if
92 
93 
94  if (present(rel_tol) .and. present(abs_tol)) then
95  call this%ksp_init(max_iter, rel_tol, abs_tol)
96  else if (present(rel_tol)) then
97  call this%ksp_init(max_iter, rel_tol=rel_tol)
98  else if (present(abs_tol)) then
99  call this%ksp_init(max_iter, abs_tol=abs_tol)
100  else
101  call this%ksp_init(max_iter)
102  end if
103 
104  call device_event_create(this%gs_event, 2)
105  end subroutine cg_device_init
106 
108  subroutine cg_device_free(this)
109  class(cg_device_t), intent(inout) :: this
110 
111  call this%ksp_free()
112 
113  if (allocated(this%w)) then
114  deallocate(this%w)
115  end if
116 
117  if (allocated(this%r)) then
118  deallocate(this%r)
119  end if
120 
121  if (allocated(this%p)) then
122  deallocate(this%p)
123  end if
124 
125  if (allocated(this%z)) then
126  deallocate(this%z)
127  end if
128 
129  nullify(this%M)
130 
131  if (c_associated(this%w_d)) then
132  call device_free(this%w_d)
133  end if
134 
135  if (c_associated(this%r_d)) then
136  call device_free(this%r_d)
137  end if
138 
139  if (c_associated(this%p_d)) then
140  call device_free(this%p_d)
141  end if
142 
143  if (c_associated(this%z_d)) then
144  call device_free(this%z_d)
145  end if
146 
147  if (c_associated(this%gs_event)) then
148  call device_event_destroy(this%gs_event)
149  end if
150 
151  end subroutine cg_device_free
152 
154  function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
155  class(cg_device_t), intent(inout) :: this
156  class(ax_t), intent(inout) :: ax
157  type(field_t), intent(inout) :: x
158  integer, intent(in) :: n
159  real(kind=rp), dimension(n), intent(inout) :: f
160  type(coef_t), intent(inout) :: coef
161  type(bc_list_t), intent(inout) :: blst
162  type(gs_t), intent(inout) :: gs_h
163  type(ksp_monitor_t) :: ksp_results
164  integer, optional, intent(in) :: niter
165  real(kind=rp), parameter :: one = 1.0
166  real(kind=rp), parameter :: zero = 0.0
167  integer :: iter, max_iter
168  real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
169  real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
170  type(c_ptr) :: f_d
171 
172  f_d = device_get_ptr(f)
173 
174  if (present(niter)) then
175  max_iter = niter
176  else
177  max_iter = this%max_iter
178  end if
179  norm_fac = one/sqrt(coef%volume)
180 
181  rtz1 = one
182  call device_rzero(x%x_d, n)
183  call device_rzero(this%p_d, n)
184  call device_copy(this%r_d, f_d, n)
185 
186  rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
187  rnorm = sqrt(rtr)*norm_fac
188  ksp_results%res_start = rnorm
189  ksp_results%res_final = rnorm
190  ksp_results%iter = 0
191  if(abscmp(rnorm, zero)) return
192  do iter = 1, max_iter
193  call this%M%solve(this%z, this%r, n)
194  rtz2 = rtz1
195  rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
196  beta = rtz1 / rtz2
197  if (iter .eq. 1) beta = zero
198  call device_add2s1(this%p_d, this%z_d, beta, n)
199 
200  call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
201  call gs_h%op(this%w, n, gs_op_add, this%gs_event)
202  call device_event_sync(this%gs_event)
203  call bc_list_apply(blst, this%w, n)
204 
205  pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
206 
207  alpha = rtz1 / pap
208  alphm = -alpha
209  call device_add2s2(x%x_d, this%p_d, alpha, n)
210  call device_add2s2(this%r_d, this%w_d, alphm, n)
211 
212  rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
213  if (iter .eq. 1) rtr0 = rtr
214  rnorm = sqrt(rtr)*norm_fac
215  if (rnorm .lt. this%abs_tol) then
216  exit
217  end if
218  end do
219  ksp_results%res_final = rnorm
220  ksp_results%iter = iter
221 
222  end function cg_device_solve
223 
224 end module cg_device
225 
226 
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Defines a Matrix-vector product.
Definition: ax.f90:34
Defines a boundary condition.
Definition: bc.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition: cg_device.f90:34
type(ksp_monitor_t) function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
Definition: cg_device.f90:155
subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
Initialise a device based PCG solver.
Definition: cg_device.f90:70
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
Definition: cg_device.f90:109
Coefficients.
Definition: coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
subroutine, public device_add2s2(a_d, b_d, c1, n)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
subroutine, public device_copy(a_d, b_d, n)
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition: device.F90:1209
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:172
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition: device.F90:1172
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition: device.F90:1142
Defines a field.
Definition: field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition: krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition: krylov.f90:50
Definition: math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Krylov preconditioner.
Definition: precon.f90:34
Base type for a matrix-vector product providing .
Definition: ax.f90:43
A list of boundary conditions.
Definition: bc.f90:102
Device based preconditioned conjugate gradient method.
Definition: cg_device.f90:50
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:54
Type for storing initial and final residuals in a Krylov solver.
Definition: krylov.f90:55
Base abstract type for a canonical Krylov method, solving .
Definition: krylov.f90:65
Defines a canonical Krylov preconditioner.
Definition: precon.f90:40