Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp
37 use device, only : device_get_ptr
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
44 use comm
45 use, intrinsic :: iso_c_binding
46 implicit none
47 private
48
51
52#ifdef HAVE_HIP
53 interface
54 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
55 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
56 bind(c, name = 'hip_dudxyz')
57 use, intrinsic :: iso_c_binding
58 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
59 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
60 integer(c_int) :: nel, lx
61 end subroutine hip_dudxyz
62 end interface
63
64 interface
65 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
66 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
67 bind(c, name = 'hip_cdtp')
68 use, intrinsic :: iso_c_binding
69 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
70 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
71 integer(c_int) :: nel, lx
72 end subroutine hip_cdtp
73 end interface
74
75 interface
76 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
77 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
78 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
79 jacinv_d, nel, gdim, lx) &
80 bind(c, name = 'hip_conv1')
81 use, intrinsic :: iso_c_binding
82 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
83 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
84 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
85 type(c_ptr), value :: jacinv_d
86 integer(c_int) :: nel, gdim, lx
87 end subroutine hip_conv1
88 end interface
89
90 interface
91 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
92 dx_d, dy_d, dz_d, &
93 drdx_d, dsdx_d, dtdx_d, &
94 drdy_d, dsdy_d, dtdy_d, &
95 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
96 bind(c, name = 'hip_opgrad')
97 use, intrinsic :: iso_c_binding
98 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
99 type(c_ptr), value :: dx_d, dy_d, dz_d
100 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
101 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
102 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
103 type(c_ptr), value :: w3_d
104 integer(c_int) :: nel, lx
105 end subroutine hip_opgrad
106 end interface
107
108 interface
109 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
110 dx_d, dy_d, dz_d, &
111 drdx_d, dsdx_d, dtdx_d, &
112 drdy_d, dsdy_d, dtdy_d, &
113 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
114 bind(c, name = 'hip_lambda2')
115 use, intrinsic :: iso_c_binding
116 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
117 type(c_ptr), value :: dx_d, dy_d, dz_d
118 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
119 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
120 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
121 type(c_ptr), value :: jacinv_d
122 integer(c_int) :: nel, lx
123 end subroutine hip_lambda2
124 end interface
125
126 interface
127 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
128 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
129 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
130 jacinv_d, nel, lx) &
131 bind(c, name = 'hip_cfl')
132 use, intrinsic :: iso_c_binding
133 import c_rp
134 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
135 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
136 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
137 real(c_rp) :: dt
138 integer(c_int) :: nel, lx
139 end function hip_cfl
140 end interface
141#elif HAVE_CUDA
142 interface
143 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
144 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
145 bind(c, name = 'cuda_dudxyz')
146 use, intrinsic :: iso_c_binding
147 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
148 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
149 integer(c_int) :: nel, lx
150 end subroutine cuda_dudxyz
151 end interface
152
153 interface
154 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
155 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
156 bind(c, name = 'cuda_cdtp')
157 use, intrinsic :: iso_c_binding
158 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
159 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
160 integer(c_int) :: nel, lx
161 end subroutine cuda_cdtp
162 end interface
163
164 interface
165 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
166 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
167 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
168 jacinv_d, nel, gdim, lx) &
169 bind(c, name = 'cuda_conv1')
170 use, intrinsic :: iso_c_binding
171 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
172 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
173 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
174 type(c_ptr), value :: jacinv_d
175 integer(c_int) :: nel, gdim, lx
176 end subroutine cuda_conv1
177 end interface
178
179 interface
180 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
181 dx_d, dy_d, dz_d, &
182 drdx_d, dsdx_d, dtdx_d, &
183 drdy_d, dsdy_d, dtdy_d, &
184 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
185 bind(c, name = 'cuda_opgrad')
186 use, intrinsic :: iso_c_binding
187 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
188 type(c_ptr), value :: dx_d, dy_d, dz_d
189 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
190 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
191 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
192 type(c_ptr), value :: w3_d
193 integer(c_int) :: nel, lx
194 end subroutine cuda_opgrad
195 end interface
196
197 interface
198 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
199 dx_d, dy_d, dz_d, &
200 drdx_d, dsdx_d, dtdx_d, &
201 drdy_d, dsdy_d, dtdy_d, &
202 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
203 bind(c, name = 'cuda_lambda2')
204 use, intrinsic :: iso_c_binding
205 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
206 type(c_ptr), value :: dx_d, dy_d, dz_d
207 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
208 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
209 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
210 type(c_ptr), value :: jacinv_d
211 integer(c_int) :: nel, lx
212 end subroutine cuda_lambda2
213 end interface
214
215
216 interface
217 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
218 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
219 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
220 jacinv_d, nel, lx) &
221 bind(c, name = 'cuda_cfl')
222 use, intrinsic :: iso_c_binding
223 import c_rp
224 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
225 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
226 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
227 real(c_rp) :: dt
228 integer(c_int) :: nel, lx
229 end function cuda_cfl
230 end interface
231#elif HAVE_OPENCL
232 interface
233 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
234 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
235 bind(c, name = 'opencl_dudxyz')
236 use, intrinsic :: iso_c_binding
237 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
238 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
239 integer(c_int) :: nel, lx
240 end subroutine opencl_dudxyz
241 end interface
242
243 interface
244 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
245 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
246 bind(c, name = 'opencl_cdtp')
247 use, intrinsic :: iso_c_binding
248 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
249 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
250 integer(c_int) :: nel, lx
251 end subroutine opencl_cdtp
252 end interface
253
254 interface
255 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
256 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
257 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
258 jacinv_d, nel, gdim, lx) &
259 bind(c, name = 'opencl_conv1')
260 use, intrinsic :: iso_c_binding
261 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
262 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
263 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
264 type(c_ptr), value :: jacinv_d
265 integer(c_int) :: nel, gdim, lx
266 end subroutine opencl_conv1
267 end interface
268
269 interface
270 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
271 dx_d, dy_d, dz_d, &
272 drdx_d, dsdx_d, dtdx_d, &
273 drdy_d, dsdy_d, dtdy_d, &
274 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
275 bind(c, name = 'opencl_opgrad')
276 use, intrinsic :: iso_c_binding
277 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
278 type(c_ptr), value :: dx_d, dy_d, dz_d
279 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
280 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
281 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
282 type(c_ptr), value :: w3_d
283 integer(c_int) :: nel, lx
284 end subroutine opencl_opgrad
285 end interface
286
287 interface
288 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
289 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
290 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
291 jacinv_d, nel, lx) &
292 bind(c, name = 'opencl_cfl')
293 use, intrinsic :: iso_c_binding
294 import c_rp
295 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
296 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
297 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
298 real(c_rp) :: dt
299 integer(c_int) :: nel, lx
300 end function opencl_cfl
301 end interface
302
303 interface
304 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
305 dx_d, dy_d, dz_d, &
306 drdx_d, dsdx_d, dtdx_d, &
307 drdy_d, dsdy_d, dtdy_d, &
308 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
309 bind(c, name = 'opencl_lambda2')
310 use, intrinsic :: iso_c_binding
311 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
312 type(c_ptr), value :: dx_d, dy_d, dz_d
313 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
314 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
315 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
316 type(c_ptr), value :: jacinv_d
317 integer(c_int) :: nel, lx
318 end subroutine opencl_lambda2
319 end interface
320#endif
321
322contains
323
324 subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
325 type(coef_t), intent(in), target :: coef
326 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
327 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
328 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
329
330 du_d = device_get_ptr(du)
331 u_d = device_get_ptr(u)
332
333 dr_d = device_get_ptr(dr)
334 ds_d = device_get_ptr(ds)
335 dt_d = device_get_ptr(dt)
336
337 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
338#ifdef HAVE_HIP
339 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
340 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
341 msh%nelv, xh%lx)
342#elif HAVE_CUDA
343 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
344 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
345 msh%nelv, xh%lx)
346#elif HAVE_OPENCL
347 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
348 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
349 msh%nelv, xh%lx)
350#else
351 call neko_error('No device backend configured')
352#endif
353 end associate
354
355 end subroutine opr_device_dudxyz
356
357 subroutine opr_device_opgrad(ux, uy, uz, u, coef)
358 type(coef_t), intent(in) :: coef
359 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
360 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
361 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
362 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
363 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
364
365 ux_d = device_get_ptr(ux)
366 uy_d = device_get_ptr(uy)
367 uz_d = device_get_ptr(uz)
368
369 u_d = device_get_ptr(u)
370
371 associate(xh => coef%Xh, msh => coef%msh)
372#ifdef HAVE_HIP
373 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
374 xh%dx_d, xh%dy_d, xh%dz_d, &
375 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
376 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
377 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
378 xh%w3_d, msh%nelv, xh%lx)
379#elif HAVE_CUDA
380 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
381 xh%dx_d, xh%dy_d, xh%dz_d, &
382 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
383 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
384 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
385 xh%w3_d, msh%nelv, xh%lx)
386#elif HAVE_OPENCL
387 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
388 xh%dx_d, xh%dy_d, xh%dz_d, &
389 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
390 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
391 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
392 xh%w3_d, msh%nelv, xh%lx)
393#else
394 call neko_error('No device backend configured')
395#endif
396 end associate
397
398 end subroutine opr_device_opgrad
399 subroutine opr_device_lambda2(lambda2, u, v, w, coef)
400 type(coef_t), intent(in) :: coef
401 type(field_t), intent(inout) :: lambda2
402 type(field_t), intent(in) :: u, v, w
403#ifdef HAVE_HIP
404 call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
405 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
406 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
407 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
408 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
409 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
410#elif HAVE_CUDA
411 call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
412 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
413 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
414 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
415 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
416 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
417#elif HAVE_OPENCL
418 call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
419 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
420 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
421 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
422 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
423 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
424#else
425 call neko_error('No device backend configured')
426#endif
427 end subroutine opr_device_lambda2
428
429 subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
430 type(coef_t), intent(in) :: coef
431 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
432 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
433 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
434 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
435 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
436 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
437
438 dtx_d = device_get_ptr(dtx)
439 x_d = device_get_ptr(x)
440
441 dr_d = device_get_ptr(dr)
442 ds_d = device_get_ptr(ds)
443 dt_d = device_get_ptr(dt)
444
445 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
446#ifdef HAVE_HIP
447 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
448 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
449 msh%nelv, xh%lx)
450#elif HAVE_CUDA
451 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
452 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
453 msh%nelv, xh%lx)
454#elif HAVE_OPENCL
455 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
456 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
457 msh%nelv, xh%lx)
458#else
459 call neko_error('No device backend configured')
460#endif
461 end associate
462
463 end subroutine opr_device_cdtp
464
465 subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
466 type(space_t), intent(in) :: xh
467 type(coef_t), intent(in) :: coef
468 integer, intent(in) :: nelv, gdim
469 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
470 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
471 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
472 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
473 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
474 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
475
476 du_d = device_get_ptr(du)
477 u_d = device_get_ptr(u)
478
479 vx_d = device_get_ptr(vx)
480 vy_d = device_get_ptr(vy)
481 vz_d = device_get_ptr(vz)
482
483 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
484#ifdef HAVE_HIP
485 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
486 xh%dx_d, xh%dy_d, xh%dz_d, &
487 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
491#elif HAVE_CUDA
492 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
493 xh%dx_d, xh%dy_d, xh%dz_d, &
494 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
498#elif HAVE_OPENCL
499 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
500 xh%dx_d, xh%dy_d, xh%dz_d, &
501 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
502 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
503 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
504 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
505#else
506 call neko_error('No device backend configured')
507#endif
508 end associate
509
510 end subroutine opr_device_conv1
511
512 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
513 type(field_t), intent(inout) :: w1
514 type(field_t), intent(inout) :: w2
515 type(field_t), intent(inout) :: w3
516 type(field_t), intent(in) :: u1
517 type(field_t), intent(in) :: u2
518 type(field_t), intent(in) :: u3
519 type(field_t), intent(inout) :: work1
520 type(field_t), intent(inout) :: work2
521 type(coef_t), intent(in) :: c_xh
522 integer :: gdim, n, nelv
523
524 n = w1%dof%size()
525 gdim = c_xh%msh%gdim
526 nelv = c_xh%msh%nelv
527
528 ! this%work1=dw/dy ; this%work2=dv/dz
529#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
530#ifdef HAVE_HIP
531 call hip_dudxyz(work1%x_d, u3%x_d, &
532 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
533 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
534 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
535#elif HAVE_CUDA
536 call cuda_dudxyz(work1%x_d, u3%x_d, &
537 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
538 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
539 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
540#elif HAVE_OPENCL
541 call opencl_dudxyz(work1%x_d, u3%x_d, &
542 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
543 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
544 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
545#endif
546 if (gdim .eq. 3) then
547#ifdef HAVE_HIP
548 call hip_dudxyz(work2%x_d, u2%x_d, &
549 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
550 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
551 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
552#elif HAVE_CUDA
553 call cuda_dudxyz(work2%x_d, u2%x_d, &
554 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
555 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
556 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
557#elif HAVE_OPENCL
558 call opencl_dudxyz(work2%x_d, u2%x_d, &
559 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
560 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
561 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
562#endif
563 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
564 else
565 call device_copy(w1%x_d, work1%x_d, n)
566 endif
567 ! this%work1=du/dz ; this%work2=dw/dx
568 if (gdim .eq. 3) then
569#ifdef HAVE_HIP
570 call hip_dudxyz(work1%x_d, u1%x_d, &
571 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
572 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
573 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
574 call hip_dudxyz(work2%x_d, u3%x_d, &
575 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
576 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
577 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
578#elif HAVE_CUDA
579 call cuda_dudxyz(work1%x_d, u1%x_d, &
580 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
581 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
582 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
583 call cuda_dudxyz(work2%x_d, u3%x_d, &
584 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
585 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
586 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
587#elif HAVE_OPENCL
588 call opencl_dudxyz(work1%x_d, u1%x_d, &
589 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
590 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
591 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
592 call opencl_dudxyz(work2%x_d, u3%x_d, &
593 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
594 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
595 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
596#endif
597 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
598 else
599 call device_rzero (work1%x_d, n)
600#ifdef HAVE_HIP
601 call hip_dudxyz(work2%x_d, u3%x_d, &
602 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
603 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
604 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
605#elif HAVE_CUDA
606 call cuda_dudxyz(work2%x_d, u3%x_d, &
607 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
608 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
609 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
610#elif HAVE_OPENCL
611 call opencl_dudxyz(work2%x_d, u3%x_d, &
612 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
613 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
614 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
615#endif
616 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
617 endif
618 ! this%work1=dv/dx ; this%work2=du/dy
619#ifdef HAVE_HIP
620 call hip_dudxyz(work1%x_d, u2%x_d, &
621 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
622 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
623 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
624 call hip_dudxyz(work2%x_d, u1%x_d, &
625 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
626 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
627 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
628#elif HAVE_CUDA
629 call cuda_dudxyz(work1%x_d, u2%x_d, &
630 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
631 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
632 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
633 call cuda_dudxyz(work2%x_d, u1%x_d, &
634 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
635 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
636 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
637#elif HAVE_OPENCL
638 call opencl_dudxyz(work1%x_d, u2%x_d, &
639 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
640 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
641 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
642 call opencl_dudxyz(work2%x_d, u1%x_d, &
643 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
644 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
645 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
646#endif
647 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
648 !! BC dependent, Needs to change if cyclic
649
650 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
651 call c_xh%gs_h%op(w1, gs_op_add)
652 call c_xh%gs_h%op(w2, gs_op_add)
653 call c_xh%gs_h%op(w3, gs_op_add)
654 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
655
656#else
657 call neko_error('No device backend configured')
658#endif
659
660 end subroutine opr_device_curl
661
662 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
663 type(space_t) :: xh
664 type(coef_t) :: coef
665 integer :: nelv, gdim
666 real(kind=rp) :: dt
667 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
668 real(kind=rp) :: cfl
669 type(c_ptr) :: u_d, v_d, w_d
670
671 u_d = device_get_ptr(u)
672 v_d = device_get_ptr(v)
673 w_d = device_get_ptr(w)
674
675#ifdef HAVE_HIP
676 cfl = hip_cfl(dt, u_d, v_d, w_d, &
677 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
678 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
679 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
680 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
681 coef%jacinv_d, nelv, xh%lx)
682#elif HAVE_CUDA
683 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
684 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
685 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
686 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
687 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
688 coef%jacinv_d, nelv, xh%lx)
689#elif HAVE_OPENCL
690 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
691 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
692 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
693 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
694 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
695 coef%jacinv_d, nelv, xh%lx)
696#else
697 cfl = 0.0_rp
698 call neko_error('No device backend configured')
699#endif
700 end function opr_device_cfl
701
702end module opr_device
703
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
subroutine, public device_rzero(a_d, n)
Zero a real vector.
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Defines a field.
Definition field.f90:34
Gather-scatter.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:52
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:54
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:52
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:52
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:52
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:53
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62