Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
cg_device.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : abscmp
44 use device
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
48 implicit none
49
51 type, public, extends(ksp_t) :: cg_device_t
52 real(kind=rp), allocatable :: w(:)
53 real(kind=rp), allocatable :: r(:)
54 real(kind=rp), allocatable :: p(:)
55 real(kind=rp), allocatable :: z(:)
56 type(c_ptr) :: w_d = c_null_ptr
57 type(c_ptr) :: r_d = c_null_ptr
58 type(c_ptr) :: p_d = c_null_ptr
59 type(c_ptr) :: z_d = c_null_ptr
60 type(c_ptr) :: gs_event = c_null_ptr
61 contains
62 procedure, pass(this) :: init => cg_device_init
63 procedure, pass(this) :: free => cg_device_free
64 procedure, pass(this) :: solve => cg_device_solve
65 procedure, pass(this) :: solve_coupled => cg_device_solve_coupled
66 end type cg_device_t
67
68contains
69
71 subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
72 class(cg_device_t), intent(inout) :: this
73 class(pc_t), optional, intent(in), target :: M
74 integer, intent(in) :: n
75 integer, intent(in) :: max_iter
76 real(kind=rp), optional, intent(in) :: rel_tol
77 real(kind=rp), optional, intent(in) :: abs_tol
78 logical, optional, intent(in) :: monitor
79
80 call this%free()
81
82 allocate(this%w(n))
83 allocate(this%r(n))
84 allocate(this%p(n))
85 allocate(this%z(n))
86
87 call device_map(this%z, this%z_d, n)
88 call device_map(this%p, this%p_d, n)
89 call device_map(this%r, this%r_d, n)
90 call device_map(this%w, this%w_d, n)
91
92 if (present(m)) then
93 this%M => m
94 end if
95
96 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
97 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
98 else if (present(rel_tol) .and. present(abs_tol)) then
99 call this%ksp_init(max_iter, rel_tol, abs_tol)
100 else if (present(monitor) .and. present(abs_tol)) then
101 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
102 else if (present(rel_tol) .and. present(monitor)) then
103 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
104 else if (present(rel_tol)) then
105 call this%ksp_init(max_iter, rel_tol = rel_tol)
106 else if (present(abs_tol)) then
107 call this%ksp_init(max_iter, abs_tol = abs_tol)
108 else if (present(monitor)) then
109 call this%ksp_init(max_iter, monitor = monitor)
110 else
111 call this%ksp_init(max_iter)
112 end if
113
114 call device_event_create(this%gs_event, 2)
115 end subroutine cg_device_init
116
118 subroutine cg_device_free(this)
119 class(cg_device_t), intent(inout) :: this
120
121 call this%ksp_free()
122
123 if (allocated(this%w)) then
124 deallocate(this%w)
125 end if
126
127 if (allocated(this%r)) then
128 deallocate(this%r)
129 end if
130
131 if (allocated(this%p)) then
132 deallocate(this%p)
133 end if
134
135 if (allocated(this%z)) then
136 deallocate(this%z)
137 end if
138
139 nullify(this%M)
140
141 if (c_associated(this%w_d)) then
142 call device_free(this%w_d)
143 end if
144
145 if (c_associated(this%r_d)) then
146 call device_free(this%r_d)
147 end if
148
149 if (c_associated(this%p_d)) then
150 call device_free(this%p_d)
151 end if
152
153 if (c_associated(this%z_d)) then
154 call device_free(this%z_d)
155 end if
156
157 if (c_associated(this%gs_event)) then
158 call device_event_destroy(this%gs_event)
159 end if
160
161 end subroutine cg_device_free
162
164 function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
165 class(cg_device_t), intent(inout) :: this
166 class(ax_t), intent(in) :: ax
167 type(field_t), intent(inout) :: x
168 integer, intent(in) :: n
169 real(kind=rp), dimension(n), intent(in) :: f
170 type(coef_t), intent(inout) :: coef
171 type(bc_list_t), intent(inout) :: blst
172 type(gs_t), intent(inout) :: gs_h
173 type(ksp_monitor_t) :: ksp_results
174 integer, optional, intent(in) :: niter
175 real(kind=rp), parameter :: one = 1.0
176 real(kind=rp), parameter :: zero = 0.0
177 integer :: iter, max_iter
178 real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
179 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
180 type(c_ptr) :: f_d
181
182 f_d = device_get_ptr(f)
183
184 if (present(niter)) then
185 max_iter = niter
186 else
187 max_iter = this%max_iter
188 end if
189 norm_fac = one/sqrt(coef%volume)
190
191 rtz1 = one
192 call device_rzero(x%x_d, n)
193 call device_rzero(this%p_d, n)
194 call device_copy(this%r_d, f_d, n)
195
196 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
197 rnorm = sqrt(rtr)*norm_fac
198 ksp_results%res_start = rnorm
199 ksp_results%res_final = rnorm
200 ksp_results%iter = 0
201 if(abscmp(rnorm, zero)) return
202 call this%monitor_start('CG')
203 do iter = 1, max_iter
204 call this%M%solve(this%z, this%r, n)
205 rtz2 = rtz1
206 rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
207 beta = rtz1 / rtz2
208 if (iter .eq. 1) beta = zero
209 call device_add2s1(this%p_d, this%z_d, beta, n)
210
211 call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
212 call gs_h%op(this%w, n, gs_op_add, this%gs_event)
213 call device_event_sync(this%gs_event)
214 call blst%apply(this%w, n)
215
216 pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
217
218 alpha = rtz1 / pap
219 alphm = -alpha
220 call device_add2s2(x%x_d, this%p_d, alpha, n)
221 call device_add2s2(this%r_d, this%w_d, alphm, n)
222
223 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
224 if (iter .eq. 1) rtr0 = rtr
225 rnorm = sqrt(rtr)*norm_fac
226 call this%monitor_iter(iter, rnorm)
227 if (rnorm .lt. this%abs_tol) then
228 exit
229 end if
230 end do
231 call this%monitor_stop()
232 ksp_results%res_final = rnorm
233 ksp_results%iter = iter
234 ksp_results%converged = this%is_converged(iter, rnorm)
235
236 end function cg_device_solve
237
239 function cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
240 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
241 class(cg_device_t), intent(inout) :: this
242 class(ax_t), intent(in) :: ax
243 type(field_t), intent(inout) :: x
244 type(field_t), intent(inout) :: y
245 type(field_t), intent(inout) :: z
246 integer, intent(in) :: n
247 real(kind=rp), dimension(n), intent(in) :: fx
248 real(kind=rp), dimension(n), intent(in) :: fy
249 real(kind=rp), dimension(n), intent(in) :: fz
250 type(coef_t), intent(inout) :: coef
251 type(bc_list_t), intent(inout) :: blstx
252 type(bc_list_t), intent(inout) :: blsty
253 type(bc_list_t), intent(inout) :: blstz
254 type(gs_t), intent(inout) :: gs_h
255 type(ksp_monitor_t), dimension(3) :: ksp_results
256 integer, optional, intent(in) :: niter
257
258 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
259 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
260 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
261
262 end function cg_device_solve_coupled
263
264end module cg_device
Return the device pointer for an associated Fortran array.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition cg_device.f90:34
type(ksp_monitor_t) function cg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
subroutine cg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a device based PCG solver.
Definition cg_device.f90:72
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
type(ksp_monitor_t) function, dimension(3) cg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1244
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1209
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1179
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:47
Device based preconditioned conjugate gradient method.
Definition cg_device.f90:51
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40