Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg_device.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : abscmp
44 use device
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
48 implicit none
49
51 type, public, extends(ksp_t) :: cg_device_t
52 real(kind=rp), allocatable :: w(:)
53 real(kind=rp), allocatable :: r(:)
54 real(kind=rp), allocatable :: p(:)
55 real(kind=rp), allocatable :: z(:)
56 type(c_ptr) :: w_d = c_null_ptr
57 type(c_ptr) :: r_d = c_null_ptr
58 type(c_ptr) :: p_d = c_null_ptr
59 type(c_ptr) :: z_d = c_null_ptr
60 type(c_ptr) :: gs_event = c_null_ptr
61 contains
62 procedure, pass(this) :: init => cg_device_init
63 procedure, pass(this) :: free => cg_device_free
64 procedure, pass(this) :: solve => cg_device_solve
65 procedure, pass(this) :: solve_coupled => cg_device_solve_coupled
66 end type cg_device_t
67
68contains
69
71 subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
72 class(cg_device_t), target, intent(inout) :: this
73 class(pc_t), optional, intent(in), target :: M
74 integer, intent(in) :: n
75 integer, intent(in) :: max_iter
76 real(kind=rp), optional, intent(in) :: rel_tol
77 real(kind=rp), optional, intent(in) :: abs_tol
78 logical, optional, intent(in) :: monitor
79
80 call this%free()
81
82 allocate(this%w(n))
83 allocate(this%r(n))
84 allocate(this%p(n))
85 allocate(this%z(n))
86
87 call device_map(this%z, this%z_d, n)
88 call device_map(this%p, this%p_d, n)
89 call device_map(this%r, this%r_d, n)
90 call device_map(this%w, this%w_d, n)
91
92 if (present(m)) then
93 this%M => m
94 end if
95
96 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
97 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
98 else if (present(rel_tol) .and. present(abs_tol)) then
99 call this%ksp_init(max_iter, rel_tol, abs_tol)
100 else if (present(monitor) .and. present(abs_tol)) then
101 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
102 else if (present(rel_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
104 else if (present(rel_tol)) then
105 call this%ksp_init(max_iter, rel_tol = rel_tol)
106 else if (present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol)
108 else if (present(monitor)) then
109 call this%ksp_init(max_iter, monitor = monitor)
110 else
111 call this%ksp_init(max_iter)
112 end if
113
114 call device_event_create(this%gs_event, 2)
115 end subroutine cg_device_init
116
118 subroutine cg_device_free(this)
119 class(cg_device_t), intent(inout) :: this
120
121 call this%ksp_free()
122
123 if (allocated(this%w)) then
124 if (c_associated(this%w_d)) then
125 call device_unmap(this%w, this%w_d)
126 end if
127 deallocate(this%w)
128 end if
129
130 if (allocated(this%r)) then
131 if (c_associated(this%r_d)) then
132 call device_unmap(this%r, this%r_d)
133 end if
134 deallocate(this%r)
135 end if
136
137 if (allocated(this%p)) then
138 if (c_associated(this%p_d)) then
139 call device_unmap(this%p, this%p_d)
140 end if
141 deallocate(this%p)
142 end if
143
144 if (allocated(this%z)) then
145 if (c_associated(this%z_d)) then
146 call device_unmap(this%z, this%z_d)
147 end if
148 deallocate(this%z)
149 end if
150
151 nullify(this%M)
152
153 if (c_associated(this%gs_event)) then
154 call device_event_destroy(this%gs_event)
155 end if
156
157 end subroutine cg_device_free
158
160 function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
161 class(cg_device_t), intent(inout) :: this
162 class(ax_t), intent(in) :: ax
163 type(field_t), intent(inout) :: x
164 integer, intent(in) :: n
165 real(kind=rp), dimension(n), intent(in) :: f
166 type(coef_t), intent(inout) :: coef
167 type(bc_list_t), intent(inout) :: blst
168 type(gs_t), intent(inout) :: gs_h
169 type(ksp_monitor_t) :: ksp_results
170 integer, optional, intent(in) :: niter
171 real(kind=rp), parameter :: one = 1.0
172 real(kind=rp), parameter :: zero = 0.0
173 integer :: iter, max_iter
174 real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
175 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
176 type(c_ptr) :: f_d
177
178 f_d = device_get_ptr(f)
179
180 if (present(niter)) then
181 max_iter = niter
182 else
183 max_iter = this%max_iter
184 end if
185 norm_fac = one/sqrt(coef%volume)
186
187 rtz1 = one
188 call device_rzero(x%x_d, n)
189 call device_rzero(this%p_d, n)
190 call device_copy(this%r_d, f_d, n)
191
192 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
193 rnorm = sqrt(rtr)*norm_fac
194 ksp_results%res_start = rnorm
195 ksp_results%res_final = rnorm
196 ksp_results%iter = 0
197 if(abscmp(rnorm, zero)) then
198 ksp_results%converged = .true.
199 return
200 end if
201 call this%monitor_start('CG')
202 do iter = 1, max_iter
203 call this%M%solve(this%z, this%r, n)
204 rtz2 = rtz1
205 rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
206 beta = rtz1 / rtz2
207 if (iter .eq. 1) beta = zero
208 call device_add2s1(this%p_d, this%z_d, beta, n)
209
210 call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
211 call gs_h%op(this%w, n, gs_op_add, this%gs_event)
212 call device_event_sync(this%gs_event)
213 call blst%apply(this%w, n)
214
215 pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
216
217 alpha = rtz1 / pap
218 alphm = -alpha
219 call device_add2s2(x%x_d, this%p_d, alpha, n)
220 call device_add2s2(this%r_d, this%w_d, alphm, n)
221
222 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
223 if (iter .eq. 1) rtr0 = rtr
224 rnorm = sqrt(rtr)*norm_fac
225 call this%monitor_iter(iter, rnorm)
226 if (rnorm .lt. this%abs_tol) then
227 exit
228 end if
229 end do
230 call this%monitor_stop()
231 ksp_results%res_final = rnorm
232 ksp_results%iter = iter
233 ksp_results%converged = this%is_converged(iter, rnorm)
234
235 end function cg_device_solve
236
238 function cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
239 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
240 class(cg_device_t), intent(inout) :: this
241 class(ax_t), intent(in) :: ax
242 type(field_t), intent(inout) :: x
243 type(field_t), intent(inout) :: y
244 type(field_t), intent(inout) :: z
245 integer, intent(in) :: n
246 real(kind=rp), dimension(n), intent(in) :: fx
247 real(kind=rp), dimension(n), intent(in) :: fy
248 real(kind=rp), dimension(n), intent(in) :: fz
249 type(coef_t), intent(inout) :: coef
250 type(bc_list_t), intent(inout) :: blstx
251 type(bc_list_t), intent(inout) :: blsty
252 type(bc_list_t), intent(inout) :: blstz
253 type(gs_t), intent(inout) :: gs_h
254 type(ksp_monitor_t), dimension(3) :: ksp_results
255 integer, optional, intent(in) :: niter
256
257 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
258 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
259 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
260
261 end function cg_device_solve_coupled
262
263end module cg_device
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition cg_device.f90:34
type(ksp_monitor_t) function cg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
subroutine cg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a device based PCG solver.
Definition cg_device.f90:72
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
type(ksp_monitor_t) function, dimension(3) cg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1550
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1516
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:49
Device based preconditioned conjugate gradient method.
Definition cg_device.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40