Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg_device.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : abscmp
44 use device
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
48 implicit none
49
51 type, public, extends(ksp_t) :: cg_device_t
52 real(kind=rp), allocatable :: w(:)
53 real(kind=rp), allocatable :: r(:)
54 real(kind=rp), allocatable :: p(:)
55 real(kind=rp), allocatable :: z(:)
56 type(c_ptr) :: w_d = c_null_ptr
57 type(c_ptr) :: r_d = c_null_ptr
58 type(c_ptr) :: p_d = c_null_ptr
59 type(c_ptr) :: z_d = c_null_ptr
60 type(c_ptr) :: gs_event = c_null_ptr
61 contains
62 procedure, pass(this) :: init => cg_device_init
63 procedure, pass(this) :: free => cg_device_free
64 procedure, pass(this) :: solve => cg_device_solve
65 procedure, pass(this) :: solve_coupled => cg_device_solve_coupled
66 end type cg_device_t
67
68contains
69
71 subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
72 class(cg_device_t), target, intent(inout) :: this
73 class(pc_t), optional, intent(in), target :: M
74 integer, intent(in) :: n
75 integer, intent(in) :: max_iter
76 real(kind=rp), optional, intent(in) :: rel_tol
77 real(kind=rp), optional, intent(in) :: abs_tol
78 logical, optional, intent(in) :: monitor
79
80 call this%free()
81
82 allocate(this%w(n))
83 allocate(this%r(n))
84 allocate(this%p(n))
85 allocate(this%z(n))
86
87 call device_map(this%z, this%z_d, n)
88 call device_map(this%p, this%p_d, n)
89 call device_map(this%r, this%r_d, n)
90 call device_map(this%w, this%w_d, n)
91
92 if (present(m)) then
93 this%M => m
94 end if
95
96 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
97 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
98 else if (present(rel_tol) .and. present(abs_tol)) then
99 call this%ksp_init(max_iter, rel_tol, abs_tol)
100 else if (present(monitor) .and. present(abs_tol)) then
101 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
102 else if (present(rel_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
104 else if (present(rel_tol)) then
105 call this%ksp_init(max_iter, rel_tol = rel_tol)
106 else if (present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol)
108 else if (present(monitor)) then
109 call this%ksp_init(max_iter, monitor = monitor)
110 else
111 call this%ksp_init(max_iter)
112 end if
113
114 call device_event_create(this%gs_event, 2)
115 end subroutine cg_device_init
116
118 subroutine cg_device_free(this)
119 class(cg_device_t), intent(inout) :: this
120
121 call this%ksp_free()
122
123 if (allocated(this%w)) then
124 deallocate(this%w)
125 end if
126
127 if (allocated(this%r)) then
128 deallocate(this%r)
129 end if
130
131 if (allocated(this%p)) then
132 deallocate(this%p)
133 end if
134
135 if (allocated(this%z)) then
136 deallocate(this%z)
137 end if
138
139 nullify(this%M)
140
141 if (c_associated(this%w_d)) then
142 call device_free(this%w_d)
143 end if
144
145 if (c_associated(this%r_d)) then
146 call device_free(this%r_d)
147 end if
148
149 if (c_associated(this%p_d)) then
150 call device_free(this%p_d)
151 end if
152
153 if (c_associated(this%z_d)) then
154 call device_free(this%z_d)
155 end if
156
157 if (c_associated(this%gs_event)) then
158 call device_event_destroy(this%gs_event)
159 end if
160
161 end subroutine cg_device_free
162
164 function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
165 class(cg_device_t), intent(inout) :: this
166 class(ax_t), intent(in) :: ax
167 type(field_t), intent(inout) :: x
168 integer, intent(in) :: n
169 real(kind=rp), dimension(n), intent(in) :: f
170 type(coef_t), intent(inout) :: coef
171 type(bc_list_t), intent(inout) :: blst
172 type(gs_t), intent(inout) :: gs_h
173 type(ksp_monitor_t) :: ksp_results
174 integer, optional, intent(in) :: niter
175 real(kind=rp), parameter :: one = 1.0
176 real(kind=rp), parameter :: zero = 0.0
177 integer :: iter, max_iter
178 real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
179 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
180 type(c_ptr) :: f_d
181
182 f_d = device_get_ptr(f)
183
184 if (present(niter)) then
185 max_iter = niter
186 else
187 max_iter = this%max_iter
188 end if
189 norm_fac = one/sqrt(coef%volume)
190
191 rtz1 = one
192 call device_rzero(x%x_d, n)
193 call device_rzero(this%p_d, n)
194 call device_copy(this%r_d, f_d, n)
195
196 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
197 rnorm = sqrt(rtr)*norm_fac
198 ksp_results%res_start = rnorm
199 ksp_results%res_final = rnorm
200 ksp_results%iter = 0
201 if(abscmp(rnorm, zero)) then
202 ksp_results%converged = .true.
203 return
204 end if
205 call this%monitor_start('CG')
206 do iter = 1, max_iter
207 call this%M%solve(this%z, this%r, n)
208 rtz2 = rtz1
209 rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
210 beta = rtz1 / rtz2
211 if (iter .eq. 1) beta = zero
212 call device_add2s1(this%p_d, this%z_d, beta, n)
213
214 call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
215 call gs_h%op(this%w, n, gs_op_add, this%gs_event)
216 call device_event_sync(this%gs_event)
217 call blst%apply(this%w, n)
218
219 pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
220
221 alpha = rtz1 / pap
222 alphm = -alpha
223 call device_add2s2(x%x_d, this%p_d, alpha, n)
224 call device_add2s2(this%r_d, this%w_d, alphm, n)
225
226 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
227 if (iter .eq. 1) rtr0 = rtr
228 rnorm = sqrt(rtr)*norm_fac
229 call this%monitor_iter(iter, rnorm)
230 if (rnorm .lt. this%abs_tol) then
231 exit
232 end if
233 end do
234 call this%monitor_stop()
235 ksp_results%res_final = rnorm
236 ksp_results%iter = iter
237 ksp_results%converged = this%is_converged(iter, rnorm)
238
239 end function cg_device_solve
240
242 function cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
243 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
244 class(cg_device_t), intent(inout) :: this
245 class(ax_t), intent(in) :: ax
246 type(field_t), intent(inout) :: x
247 type(field_t), intent(inout) :: y
248 type(field_t), intent(inout) :: z
249 integer, intent(in) :: n
250 real(kind=rp), dimension(n), intent(in) :: fx
251 real(kind=rp), dimension(n), intent(in) :: fy
252 real(kind=rp), dimension(n), intent(in) :: fz
253 type(coef_t), intent(inout) :: coef
254 type(bc_list_t), intent(inout) :: blstx
255 type(bc_list_t), intent(inout) :: blsty
256 type(bc_list_t), intent(inout) :: blstz
257 type(gs_t), intent(inout) :: gs_h
258 type(ksp_monitor_t), dimension(3) :: ksp_results
259 integer, optional, intent(in) :: niter
260
261 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
262 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
263 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
264
265 end function cg_device_solve_coupled
266
267end module cg_device
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition cg_device.f90:34
type(ksp_monitor_t) function cg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
subroutine cg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a device based PCG solver.
Definition cg_device.f90:72
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
type(ksp_monitor_t) function, dimension(3) cg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1309
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1274
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1244
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:48
Device based preconditioned conjugate gradient method.
Definition cg_device.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:73
Defines a canonical Krylov preconditioner.
Definition precon.f90:40