Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cg_device.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only: rp
37 use precon, only : pc_t
38 use ax_product, only : ax_t
39 use field, only : field_t
40 use coefs, only : coef_t
41 use gather_scatter, only : gs_t, gs_op_add
42 use bc_list, only : bc_list_t
43 use math, only : abscmp
44 use device
47 implicit none
48
50 type, public, extends(ksp_t) :: cg_device_t
51 real(kind=rp), allocatable :: w(:)
52 real(kind=rp), allocatable :: r(:)
53 real(kind=rp), allocatable :: p(:)
54 real(kind=rp), allocatable :: z(:)
55 type(c_ptr) :: w_d = c_null_ptr
56 type(c_ptr) :: r_d = c_null_ptr
57 type(c_ptr) :: p_d = c_null_ptr
58 type(c_ptr) :: z_d = c_null_ptr
59 type(c_ptr) :: gs_event = c_null_ptr
60 contains
61 procedure, pass(this) :: init => cg_device_init
62 procedure, pass(this) :: free => cg_device_free
63 procedure, pass(this) :: solve => cg_device_solve
64 procedure, pass(this) :: solve_coupled => cg_device_solve_coupled
65 end type cg_device_t
66
67contains
68
70 subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol, monitor)
71 class(cg_device_t), intent(inout) :: this
72 class(pc_t), optional, intent(in), target :: M
73 integer, intent(in) :: n
74 integer, intent(in) :: max_iter
75 real(kind=rp), optional, intent(in) :: rel_tol
76 real(kind=rp), optional, intent(in) :: abs_tol
77 logical, optional, intent(in) :: monitor
78
79 call this%free()
80
81 allocate(this%w(n))
82 allocate(this%r(n))
83 allocate(this%p(n))
84 allocate(this%z(n))
85
86 call device_map(this%z, this%z_d, n)
87 call device_map(this%p, this%p_d, n)
88 call device_map(this%r, this%r_d, n)
89 call device_map(this%w, this%w_d, n)
90
91 if (present(m)) then
92 this%M => m
93 end if
94
95 if (present(rel_tol) .and. present(abs_tol) .and. present(monitor)) then
96 call this%ksp_init(max_iter, rel_tol, abs_tol, monitor = monitor)
97 else if (present(rel_tol) .and. present(abs_tol)) then
98 call this%ksp_init(max_iter, rel_tol, abs_tol)
99 else if (present(monitor) .and. present(abs_tol)) then
100 call this%ksp_init(max_iter, abs_tol = abs_tol, monitor = monitor)
101 else if (present(rel_tol) .and. present(monitor)) then
102 call this%ksp_init(max_iter, rel_tol, monitor = monitor)
103 else if (present(rel_tol)) then
104 call this%ksp_init(max_iter, rel_tol = rel_tol)
105 else if (present(abs_tol)) then
106 call this%ksp_init(max_iter, abs_tol = abs_tol)
107 else if (present(monitor)) then
108 call this%ksp_init(max_iter, monitor = monitor)
109 else
110 call this%ksp_init(max_iter)
111 end if
112
113 call device_event_create(this%gs_event, 2)
114 end subroutine cg_device_init
115
117 subroutine cg_device_free(this)
118 class(cg_device_t), intent(inout) :: this
119
120 call this%ksp_free()
121
122 if (allocated(this%w)) then
123 deallocate(this%w)
124 end if
125
126 if (allocated(this%r)) then
127 deallocate(this%r)
128 end if
129
130 if (allocated(this%p)) then
131 deallocate(this%p)
132 end if
133
134 if (allocated(this%z)) then
135 deallocate(this%z)
136 end if
137
138 nullify(this%M)
139
140 if (c_associated(this%w_d)) then
141 call device_free(this%w_d)
142 end if
143
144 if (c_associated(this%r_d)) then
145 call device_free(this%r_d)
146 end if
147
148 if (c_associated(this%p_d)) then
149 call device_free(this%p_d)
150 end if
151
152 if (c_associated(this%z_d)) then
153 call device_free(this%z_d)
154 end if
155
156 if (c_associated(this%gs_event)) then
157 call device_event_destroy(this%gs_event)
158 end if
159
160 end subroutine cg_device_free
161
163 function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
164 class(cg_device_t), intent(inout) :: this
165 class(ax_t), intent(in) :: ax
166 type(field_t), intent(inout) :: x
167 integer, intent(in) :: n
168 real(kind=rp), dimension(n), intent(in) :: f
169 type(coef_t), intent(inout) :: coef
170 type(bc_list_t), intent(inout) :: blst
171 type(gs_t), intent(inout) :: gs_h
172 type(ksp_monitor_t) :: ksp_results
173 integer, optional, intent(in) :: niter
174 real(kind=rp), parameter :: one = 1.0
175 real(kind=rp), parameter :: zero = 0.0
176 integer :: iter, max_iter
177 real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
178 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
179 type(c_ptr) :: f_d
180
181 f_d = device_get_ptr(f)
182
183 if (present(niter)) then
184 max_iter = niter
185 else
186 max_iter = this%max_iter
187 end if
188 norm_fac = one/sqrt(coef%volume)
189
190 rtz1 = one
191 call device_rzero(x%x_d, n)
192 call device_rzero(this%p_d, n)
193 call device_copy(this%r_d, f_d, n)
194
195 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
196 rnorm = sqrt(rtr)*norm_fac
197 ksp_results%res_start = rnorm
198 ksp_results%res_final = rnorm
199 ksp_results%iter = 0
200 if(abscmp(rnorm, zero)) return
201 call this%monitor_start('CG')
202 do iter = 1, max_iter
203 call this%M%solve(this%z, this%r, n)
204 rtz2 = rtz1
205 rtz1 = device_glsc3(this%r_d, coef%mult_d, this%z_d, n)
206 beta = rtz1 / rtz2
207 if (iter .eq. 1) beta = zero
208 call device_add2s1(this%p_d, this%z_d, beta, n)
209
210 call ax%compute(this%w, this%p, coef, x%msh, x%Xh)
211 call gs_h%op(this%w, n, gs_op_add, this%gs_event)
212 call device_event_sync(this%gs_event)
213 call blst%apply(this%w, n)
214
215 pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
216
217 alpha = rtz1 / pap
218 alphm = -alpha
219 call device_add2s2(x%x_d, this%p_d, alpha, n)
220 call device_add2s2(this%r_d, this%w_d, alphm, n)
221
222 rtr = device_glsc3(this%r_d, coef%mult_d, this%r_d, n)
223 if (iter .eq. 1) rtr0 = rtr
224 rnorm = sqrt(rtr)*norm_fac
225 call this%monitor_iter(iter, rnorm)
226 if (rnorm .lt. this%abs_tol) then
227 exit
228 end if
229 end do
230 call this%monitor_stop()
231 ksp_results%res_final = rnorm
232 ksp_results%iter = iter
233 ksp_results%converged = this%is_converged(iter, rnorm)
234
235 end function cg_device_solve
236
238 function cg_device_solve_coupled(this, Ax, x, y, z, fx, fy, fz, &
239 n, coef, blstx, blsty, blstz, gs_h, niter) result(ksp_results)
240 class(cg_device_t), intent(inout) :: this
241 class(ax_t), intent(in) :: ax
242 type(field_t), intent(inout) :: x
243 type(field_t), intent(inout) :: y
244 type(field_t), intent(inout) :: z
245 integer, intent(in) :: n
246 real(kind=rp), dimension(n), intent(in) :: fx
247 real(kind=rp), dimension(n), intent(in) :: fy
248 real(kind=rp), dimension(n), intent(in) :: fz
249 type(coef_t), intent(inout) :: coef
250 type(bc_list_t), intent(inout) :: blstx
251 type(bc_list_t), intent(inout) :: blsty
252 type(bc_list_t), intent(inout) :: blstz
253 type(gs_t), intent(inout) :: gs_h
254 type(ksp_monitor_t), dimension(3) :: ksp_results
255 integer, optional, intent(in) :: niter
256
257 ksp_results(1) = this%solve(ax, x, fx, n, coef, blstx, gs_h, niter)
258 ksp_results(2) = this%solve(ax, y, fy, n, coef, blsty, gs_h, niter)
259 ksp_results(3) = this%solve(ax, z, fz, n, coef, blstz, gs_h, niter)
260
261 end function cg_device_solve_coupled
262
263end module cg_device
264
265
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Defines a Matrix-vector product.
Definition ax.f90:34
Defines a list of bc_t.
Definition bc_list.f90:34
Defines various Conjugate Gradient methods for accelerators.
Definition cg_device.f90:34
type(ksp_monitor_t) function cg_device_solve(this, ax, x, f, n, coef, blst, gs_h, niter)
Standard PCG solve.
subroutine cg_device_init(this, n, max_iter, m, rel_tol, abs_tol, monitor)
Initialise a device based PCG solver.
Definition cg_device.f90:71
subroutine cg_device_free(this)
Deallocate a device based PCG solver.
type(ksp_monitor_t) function, dimension(3) cg_device_solve_coupled(this, ax, x, y, z, fx, fy, fz, n, coef, blstx, blsty, blstz, gs_h, niter)
Standard PCG coupled solve.
Coefficients.
Definition coef.f90:34
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1229
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1194
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1164
Defines a field.
Definition field.f90:34
Gather-scatter.
Implements the base abstract type for Krylov solvers plus helper types.
Definition krylov.f90:34
integer, parameter, public ksp_max_iter
Maximum number of iters.
Definition krylov.f90:51
Definition math.f90:60
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Base type for a matrix-vector product providing .
Definition ax.f90:43
A list of allocatable `bc_t`. Follows the standard interface of lists.
Definition bc_list.f90:46
Device based preconditioned conjugate gradient method.
Definition cg_device.f90:50
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
Type for storing initial and final residuals in a Krylov solver.
Definition krylov.f90:56
Base abstract type for a canonical Krylov method, solving .
Definition krylov.f90:68
Defines a canonical Krylov preconditioner.
Definition precon.f90:40