Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
ax_helm_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use ax_helm, only : ax_helm_t
35 use num_types, only : rp
36 use coefs, only : coef_t
37 use space, only : space_t
38 use mesh, only : mesh_t
39 use device_math, only : device_addcol4
40 use device, only : device_get_ptr
41 use num_types, only : rp
42 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
43 implicit none
44 private
45
46 type, public, extends(ax_helm_t) :: ax_helm_device_t
47 contains
48 procedure, nopass :: compute => ax_helm_device_compute
49 procedure, pass(this) :: compute_vector => ax_helm_device_compute_vector
50 end type ax_helm_device_t
51
52#ifdef HAVE_HIP
53 interface
54 subroutine hip_ax_helm(w_d, u_d, &
55 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, &
56 h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
57 bind(c, name='hip_ax_helm')
58 use, intrinsic :: iso_c_binding
59 type(c_ptr), value :: w_d, u_d
60 type(c_ptr), value :: dx_d, dy_d, dz_d
61 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
62 type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
63 integer(c_int) :: nel, lx
64 end subroutine hip_ax_helm
65 end interface
66
67 interface
68 subroutine hip_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
69 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
70 h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
71 bind(c, name='hip_ax_helm_vector')
72 use, intrinsic :: iso_c_binding
73 type(c_ptr), value :: au_d, av_d, aw_d
74 type(c_ptr), value :: u_d, v_d, w_d
75 type(c_ptr), value :: dx_d, dy_d, dz_d
76 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
77 type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
78 integer(c_int) :: nel, lx
79 end subroutine hip_ax_helm_vector
80 end interface
81
82 interface
83 subroutine hip_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
84 h2_d, B_d, n) bind(c, name='hip_ax_helm_vector_part2')
85 use, intrinsic :: iso_c_binding
86 type(c_ptr), value :: au_d, av_d, aw_d
87 type(c_ptr), value :: u_d, v_d, w_d
88 type(c_ptr), value :: h2_d, B_d
89 integer(c_int) :: n
90 end subroutine hip_ax_helm_vector_part2
91 end interface
92#elif HAVE_CUDA
93 interface
94 subroutine cuda_ax_helm(w_d, u_d, &
95 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
96 h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
97 bind(c, name='cuda_ax_helm')
98 use, intrinsic :: iso_c_binding
99 type(c_ptr), value :: w_d, u_d
100 type(c_ptr), value :: dx_d, dy_d, dz_d
101 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
102 type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
103 integer(c_int) :: nel, lx
104 end subroutine cuda_ax_helm
105 end interface
106
107 interface
108 subroutine cuda_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
109 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
110 h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
111 bind(c, name='cuda_ax_helm_vector')
112 use, intrinsic :: iso_c_binding
113 type(c_ptr), value :: au_d, av_d, aw_d
114 type(c_ptr), value :: u_d, v_d, w_d
115 type(c_ptr), value :: dx_d, dy_d, dz_d
116 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
117 type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
118 integer(c_int) :: nel, lx
119 end subroutine cuda_ax_helm_vector
120 end interface
121
122 interface
123 subroutine cuda_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
124 h2_d, B_d, n) bind(c, name='cuda_ax_helm_vector_part2')
125 use, intrinsic :: iso_c_binding
126 type(c_ptr), value :: au_d, av_d, aw_d
127 type(c_ptr), value :: u_d, v_d, w_d
128 type(c_ptr), value :: h2_d, B_d
129 integer(c_int) :: n
130 end subroutine cuda_ax_helm_vector_part2
131 end interface
132#elif HAVE_OPENCL
133 interface
134 subroutine opencl_ax_helm(w_d, u_d, &
135 dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, &
136 h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
137 bind(c, name='opencl_ax_helm')
138 use, intrinsic :: iso_c_binding
139 type(c_ptr), value :: w_d, u_d
140 type(c_ptr), value :: dx_d, dy_d, dz_d
141 type(c_ptr), value :: dxt_d, dyt_d, dzt_d
142 type(c_ptr), value :: h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d
143 integer(c_int) :: nel, lx
144 end subroutine opencl_ax_helm
145 end interface
146#endif
147
148contains
149
150 subroutine ax_helm_device_compute(w, u, coef, msh, Xh)
151 type(mesh_t), intent(inout) :: msh
152 type(space_t), intent(inout) :: Xh
153 type(coef_t), intent(inout) :: coef
154 real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
155 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
156 type(c_ptr) :: u_d, w_d
157
158 u_d = device_get_ptr(u)
159 w_d = device_get_ptr(w)
160
161#ifdef HAVE_HIP
162 call hip_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
163 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
164 coef%G11_d, coef%G22_d, coef%G33_d, &
165 coef%G12_d, coef%G13_d, coef%G23_d, &
166 msh%nelv, xh%lx)
167#elif HAVE_CUDA
168 call cuda_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
169 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
170 coef%G11_d, coef%G22_d, coef%G33_d, &
171 coef%G12_d, coef%G13_d, coef%G23_d, &
172 msh%nelv, xh%lx)
173#elif HAVE_OPENCL
174 call opencl_ax_helm(w_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
175 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
176 coef%G11_d, coef%G22_d, coef%G33_d, &
177 coef%G12_d, coef%G13_d, coef%G23_d, &
178 msh%nelv, xh%lx)
179#endif
180
181 if (coef%ifh2) then
182 call device_addcol4(w_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
183 end if
184
185 end subroutine ax_helm_device_compute
186
187 subroutine ax_helm_device_compute_vector(this, au, av, aw, &
188 u, v, w, coef, msh, Xh)
189 class(ax_helm_device_t), intent(in) :: this
190 type(space_t), intent(inout) :: Xh
191 type(mesh_t), intent(inout) :: msh
192 type(coef_t), intent(inout) :: coef
193 real(kind=rp), intent(inout) :: au(xh%lx, xh%ly, xh%lz, msh%nelv)
194 real(kind=rp), intent(inout) :: av(xh%lx, xh%ly, xh%lz, msh%nelv)
195 real(kind=rp), intent(inout) :: aw(xh%lx, xh%ly, xh%lz, msh%nelv)
196 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, msh%nelv)
197 real(kind=rp), intent(inout) :: v(xh%lx, xh%ly, xh%lz, msh%nelv)
198 real(kind=rp), intent(inout) :: w(xh%lx, xh%ly, xh%lz, msh%nelv)
199 type(c_ptr) :: u_d, v_d, w_d
200 type(c_ptr) :: au_d, av_d, aw_d
201
202 u_d = device_get_ptr(u)
203 v_d = device_get_ptr(v)
204 w_d = device_get_ptr(w)
205
206 au_d = device_get_ptr(au)
207 av_d = device_get_ptr(av)
208 aw_d = device_get_ptr(aw)
209
210#ifdef HAVE_HIP
211 call hip_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
212 xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
213 coef%G11_d, coef%G22_d, coef%G33_d, &
214 coef%G12_d, coef%G13_d, coef%G23_d, &
215 msh%nelv, xh%lx)
216#elif HAVE_CUDA
217 call cuda_ax_helm_vector(au_d, av_d, aw_d, u_d, v_d, w_d, &
218 xh%dx_d, xh%dy_d, xh%dz_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
219 coef%G11_d, coef%G22_d, coef%G33_d, &
220 coef%G12_d, coef%G13_d, coef%G23_d, &
221 msh%nelv, xh%lx)
222#elif HAVE_OPENCL
223 call opencl_ax_helm(au_d, u_d, xh%dx_d, xh%dy_d, xh%dz_d, &
224 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
225 coef%G11_d, coef%G22_d, coef%G33_d, &
226 coef%G12_d, coef%G13_d, coef%G23_d, &
227 msh%nelv, xh%lx)
228 call opencl_ax_helm(av_d, v_d, xh%dx_d, xh%dy_d, xh%dz_d, &
229 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
230 coef%G11_d, coef%G22_d, coef%G33_d, &
231 coef%G12_d, coef%G13_d, coef%G23_d, &
232 msh%nelv, xh%lx)
233 call opencl_ax_helm(aw_d, w_d, xh%dx_d, xh%dy_d, xh%dz_d, &
234 xh%dxt_d, xh%dyt_d, xh%dzt_d, coef%h1_d, &
235 coef%G11_d, coef%G22_d, coef%G33_d, &
236 coef%G12_d, coef%G13_d, coef%G23_d, &
237 msh%nelv, xh%lx)
238#endif
239
240 if (coef%ifh2) then
241#ifdef HAVE_HIP
242 call hip_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
243 coef%h2_d, coef%B_d, coef%dof%size())
244#elif HAVE_CUDA
245 call cuda_ax_helm_vector_part2(au_d, av_d, aw_d, u_d, v_d, w_d, &
246 coef%h2_d, coef%B_d, coef%dof%size())
247#else
248 call device_addcol4(au_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
249 call device_addcol4(av_d ,coef%h2_d, coef%B_d, v_d, coef%dof%size())
250 call device_addcol4(aw_d ,coef%h2_d, coef%B_d, w_d, coef%dof%size())
251#endif
252 end if
253
254 end subroutine ax_helm_device_compute_vector
255
256end module ax_helm_device
void opencl_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition ax_helm.c:52
void cuda_ax_helm_vector_part2(void *au, void *av, void *aw, void *u, void *v, void *w, void *h2, void *B, int *n)
Definition ax_helm.cu:254
void cuda_ax_helm(void *w, void *u, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition ax_helm.cu:63
void cuda_ax_helm_vector(void *au, void *av, void *aw, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *dxt, void *dyt, void *dzt, void *h1, void *g11, void *g22, void *g33, void *g12, void *g13, void *g23, int *nelv, int *lx)
Definition ax_helm.cu:184
Return the device pointer for an associated Fortran array.
Definition device.F90:81
subroutine ax_helm_device_compute(w, u, coef, msh, xh)
subroutine ax_helm_device_compute_vector(this, au, av, aw, u, v, w, coef, msh, xh)
Coefficients.
Definition coef.f90:34
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Defines a mesh.
Definition mesh.f90:34
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Matrix-vector product for a Helmholtz problem.
Definition ax_helm.f90:44
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62