Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
45 use, intrinsic :: iso_c_binding
46 implicit none
47 private
48
53
54#ifdef HAVE_HIP
55 interface
56 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
57 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
58 bind(c, name = 'hip_dudxyz')
59 use, intrinsic :: iso_c_binding
60 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
61 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
62 integer(c_int) :: nel, lx
63 end subroutine hip_dudxyz
64 end interface
65
66 interface
67 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
68 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
69 bind(c, name = 'hip_cdtp')
70 use, intrinsic :: iso_c_binding
71 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
72 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
73 integer(c_int) :: nel, lx
74 end subroutine hip_cdtp
75 end interface
76
77 interface
78 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
79 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
80 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
81 jacinv_d, nel, gdim, lx) &
82 bind(c, name = 'hip_conv1')
83 use, intrinsic :: iso_c_binding
84 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
85 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
86 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
87 type(c_ptr), value :: jacinv_d
88 integer(c_int) :: nel, gdim, lx
89 end subroutine hip_conv1
90 end interface
91
92 interface
93 subroutine hip_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
94 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
95 use, intrinsic :: iso_c_binding
96 type(c_ptr), value :: du_d, u_d
97 type(c_ptr), value :: cr_d, cs_d, ct_d
98 type(c_ptr), value :: dx_d, dy_d, dz_d
99 integer(c_int) :: nel, lx
100 end subroutine hip_convect_scalar
101 end interface
102
103 interface
104 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
105 dx_d, dy_d, dz_d, &
106 drdx_d, dsdx_d, dtdx_d, &
107 drdy_d, dsdy_d, dtdy_d, &
108 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
109 bind(c, name = 'hip_opgrad')
110 use, intrinsic :: iso_c_binding
111 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
112 type(c_ptr), value :: dx_d, dy_d, dz_d
113 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
114 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
115 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
116 type(c_ptr), value :: w3_d
117 integer(c_int) :: nel, lx
118 end subroutine hip_opgrad
119 end interface
120
121 interface
122 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
123 dx_d, dy_d, dz_d, &
124 drdx_d, dsdx_d, dtdx_d, &
125 drdy_d, dsdy_d, dtdy_d, &
126 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
127 bind(c, name = 'hip_lambda2')
128 use, intrinsic :: iso_c_binding
129 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
130 type(c_ptr), value :: dx_d, dy_d, dz_d
131 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
132 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
133 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
134 type(c_ptr), value :: jacinv_d
135 integer(c_int) :: nel, lx
136 end subroutine hip_lambda2
137 end interface
138
139 interface
140 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
141 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
142 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
143 jacinv_d, nel, lx) &
144 bind(c, name = 'hip_cfl')
145 use, intrinsic :: iso_c_binding
146 import c_rp
147 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
148 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
149 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
150 real(c_rp) :: dt
151 integer(c_int) :: nel, lx
152 end function hip_cfl
153 end interface
154
155 interface
156 subroutine hip_rotate_cyc(vx_d, vy_d, vz_d, &
157 x_d, y_d, z_d, &
158 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
159 bind(c, name = 'hip_rotate_cyc')
160 use, intrinsic :: iso_c_binding
161 type(c_ptr), value :: vx_d, vy_d, vz_d
162 type(c_ptr), value :: x_d, y_d, z_d
163 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
164 integer(c_int) :: ncyc, idir
165 end subroutine hip_rotate_cyc
166 end interface
167
168 interface
169 subroutine hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
170 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
171 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
172 use, intrinsic :: iso_c_binding
173 type(c_ptr), value :: cr_d, cs_d, ct_d
174 type(c_ptr), value :: cx_d, cy_d, cz_d
175 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
176 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
177 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
178 type(c_ptr), value :: w3_d
179 integer(c_int) :: nel, lx
180 end subroutine hip_set_convect_rst
181 end interface
182
183#elif HAVE_CUDA
184 interface
185 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
186 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
187 bind(c, name = 'cuda_dudxyz')
188 use, intrinsic :: iso_c_binding
189 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
190 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
191 integer(c_int) :: nel, lx
192 end subroutine cuda_dudxyz
193 end interface
194
195 interface
196 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
197 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
198 bind(c, name = 'cuda_cdtp')
199 use, intrinsic :: iso_c_binding
200 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
201 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
202 integer(c_int) :: nel, lx
203 end subroutine cuda_cdtp
204 end interface
205
206 interface
207 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
208 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
209 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
210 jacinv_d, nel, gdim, lx) &
211 bind(c, name = 'cuda_conv1')
212 use, intrinsic :: iso_c_binding
213 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
214 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
215 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
216 type(c_ptr), value :: jacinv_d
217 integer(c_int) :: nel, gdim, lx
218 end subroutine cuda_conv1
219 end interface
220
221 interface
222 subroutine cuda_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
223 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
224 use, intrinsic :: iso_c_binding
225 type(c_ptr), value :: du_d, u_d
226 type(c_ptr), value :: cr_d, cs_d, ct_d
227 type(c_ptr), value :: dx_d, dy_d, dz_d
228 integer(c_int) :: nel, lx
229 end subroutine cuda_convect_scalar
230 end interface
231
232 interface
233 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
234 dx_d, dy_d, dz_d, &
235 drdx_d, dsdx_d, dtdx_d, &
236 drdy_d, dsdy_d, dtdy_d, &
237 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
238 bind(c, name = 'cuda_opgrad')
239 use, intrinsic :: iso_c_binding
240 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
241 type(c_ptr), value :: dx_d, dy_d, dz_d
242 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
243 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
244 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
245 type(c_ptr), value :: w3_d
246 integer(c_int) :: nel, lx
247 end subroutine cuda_opgrad
248 end interface
249
250 interface
251 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
252 dx_d, dy_d, dz_d, &
253 drdx_d, dsdx_d, dtdx_d, &
254 drdy_d, dsdy_d, dtdy_d, &
255 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
256 bind(c, name = 'cuda_lambda2')
257 use, intrinsic :: iso_c_binding
258 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
259 type(c_ptr), value :: dx_d, dy_d, dz_d
260 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
261 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
262 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
263 type(c_ptr), value :: jacinv_d
264 integer(c_int) :: nel, lx
265 end subroutine cuda_lambda2
266 end interface
267
268 interface
269 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
270 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
271 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
272 jacinv_d, nel, lx) &
273 bind(c, name = 'cuda_cfl')
274 use, intrinsic :: iso_c_binding
275 import c_rp
276 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
277 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
278 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
279 real(c_rp) :: dt
280 integer(c_int) :: nel, lx
281 end function cuda_cfl
282 end interface
283
284 interface
285 subroutine cuda_rotate_cyc(vx_d, vy_d, vz_d, &
286 x_d, y_d, z_d, &
287 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
288 bind(c, name = 'cuda_rotate_cyc')
289 use, intrinsic :: iso_c_binding
290 type(c_ptr), value :: vx_d, vy_d, vz_d
291 type(c_ptr), value :: x_d, y_d, z_d
292 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
293 integer(c_int) :: ncyc, idir
294 end subroutine cuda_rotate_cyc
295 end interface
296
297 interface
298 subroutine cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
299 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
300 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
301 use, intrinsic :: iso_c_binding
302 type(c_ptr), value :: cr_d, cs_d, ct_d
303 type(c_ptr), value :: cx_d, cy_d, cz_d
304 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
305 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
306 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
307 type(c_ptr), value :: w3_d
308 integer(c_int) :: nel, lx
309 end subroutine cuda_set_convect_rst
310 end interface
311
312#elif HAVE_OPENCL
313 interface
314 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
315 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
316 bind(c, name = 'opencl_dudxyz')
317 use, intrinsic :: iso_c_binding
318 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
319 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
320 integer(c_int) :: nel, lx
321 end subroutine opencl_dudxyz
322 end interface
323
324 interface
325 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
326 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
327 bind(c, name = 'opencl_cdtp')
328 use, intrinsic :: iso_c_binding
329 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
330 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
331 integer(c_int) :: nel, lx
332 end subroutine opencl_cdtp
333 end interface
334
335 interface
336 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
337 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
338 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
339 jacinv_d, nel, gdim, lx) &
340 bind(c, name = 'opencl_conv1')
341 use, intrinsic :: iso_c_binding
342 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
343 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
344 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
345 type(c_ptr), value :: jacinv_d
346 integer(c_int) :: nel, gdim, lx
347 end subroutine opencl_conv1
348 end interface
349
350 interface
351 subroutine opencl_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
352 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
353 use, intrinsic :: iso_c_binding
354 type(c_ptr), value :: du_d, u_d
355 type(c_ptr), value :: cr_d, cs_d, ct_d
356 type(c_ptr), value :: dx_d, dy_d, dz_d
357 integer(c_int) :: nel, lx
358 end subroutine opencl_convect_scalar
359 end interface
360
361 interface
362 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
363 dx_d, dy_d, dz_d, &
364 drdx_d, dsdx_d, dtdx_d, &
365 drdy_d, dsdy_d, dtdy_d, &
366 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
367 bind(c, name = 'opencl_opgrad')
368 use, intrinsic :: iso_c_binding
369 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
370 type(c_ptr), value :: dx_d, dy_d, dz_d
371 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
372 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
373 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
374 type(c_ptr), value :: w3_d
375 integer(c_int) :: nel, lx
376 end subroutine opencl_opgrad
377 end interface
378
379 interface
380 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
381 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
382 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
383 jacinv_d, nel, lx) &
384 bind(c, name = 'opencl_cfl')
385 use, intrinsic :: iso_c_binding
386 import c_rp
387 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
388 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
389 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
390 real(c_rp) :: dt
391 integer(c_int) :: nel, lx
392 end function opencl_cfl
393 end interface
394
395 interface
396 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
397 dx_d, dy_d, dz_d, &
398 drdx_d, dsdx_d, dtdx_d, &
399 drdy_d, dsdy_d, dtdy_d, &
400 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
401 bind(c, name = 'opencl_lambda2')
402 use, intrinsic :: iso_c_binding
403 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
404 type(c_ptr), value :: dx_d, dy_d, dz_d
405 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
406 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
407 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
408 type(c_ptr), value :: jacinv_d
409 integer(c_int) :: nel, lx
410 end subroutine opencl_lambda2
411 end interface
412
413 interface
414 subroutine opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
415 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
416 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
417 use, intrinsic :: iso_c_binding
418 type(c_ptr), value :: cr_d, cs_d, ct_d
419 type(c_ptr), value :: cx_d, cy_d, cz_d
420 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
421 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
422 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
423 type(c_ptr), value :: w3_d
424 integer(c_int) :: nel, lx
425 end subroutine opencl_set_convect_rst
426 end interface
427
428#endif
429
430contains
431
432 subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
433 type(coef_t), intent(in), target :: coef
434 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(inout) :: du
435 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, & coef%Xh%lz, coef%msh%nelv), intent(in) :: u, dr, ds, dt
436 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
437
438 du_d = device_get_ptr(du)
439 u_d = device_get_ptr(u)
440
441 dr_d = device_get_ptr(dr)
442 ds_d = device_get_ptr(ds)
443 dt_d = device_get_ptr(dt)
444
445 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
446#ifdef HAVE_HIP
447 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
448 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
449 msh%nelv, xh%lx)
450#elif HAVE_CUDA
451 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
452 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
453 msh%nelv, xh%lx)
454#elif HAVE_OPENCL
455 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
456 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
457 msh%nelv, xh%lx)
458#else
459 call neko_error('No device backend configured')
460#endif
461 end associate
462
463 end subroutine opr_device_dudxyz
464
465 subroutine opr_device_opgrad(ux, uy, uz, u, coef)
466 type(coef_t), intent(in) :: coef
467 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
468 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
469 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
470 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
471 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
472
473 ux_d = device_get_ptr(ux)
474 uy_d = device_get_ptr(uy)
475 uz_d = device_get_ptr(uz)
476
477 u_d = device_get_ptr(u)
478
479 associate(xh => coef%Xh, msh => coef%msh)
480#ifdef HAVE_HIP
481 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
482 xh%dx_d, xh%dy_d, xh%dz_d, &
483 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
484 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
485 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
486 xh%w3_d, msh%nelv, xh%lx)
487#elif HAVE_CUDA
488 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
489 xh%dx_d, xh%dy_d, xh%dz_d, &
490 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
491 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
492 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
493 xh%w3_d, msh%nelv, xh%lx)
494#elif HAVE_OPENCL
495 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
496 xh%dx_d, xh%dy_d, xh%dz_d, &
497 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
498 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
499 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
500 xh%w3_d, msh%nelv, xh%lx)
501#else
502 call neko_error('No device backend configured')
503#endif
504 end associate
505
506 end subroutine opr_device_opgrad
507 subroutine opr_device_lambda2(lambda2, u, v, w, coef)
508 type(coef_t), intent(in) :: coef
509 type(field_t), intent(inout) :: lambda2
510 type(field_t), intent(in) :: u, v, w
511#ifdef HAVE_HIP
512 call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
513 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
514 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
515 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
516 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
517 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
518#elif HAVE_CUDA
519 call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
520 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
521 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
522 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
523 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
524 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
525#elif HAVE_OPENCL
526 call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
527 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
528 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
529 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
530 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
531 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
532#else
533 call neko_error('No device backend configured')
534#endif
535 end subroutine opr_device_lambda2
536
537 subroutine opr_device_cdtp(dtx, x, dr, ds, dt, coef)
538 type(coef_t), intent(in) :: coef
539 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
540 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
541 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
542 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
543 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
544 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
545
546 dtx_d = device_get_ptr(dtx)
547 x_d = device_get_ptr(x)
548
549 dr_d = device_get_ptr(dr)
550 ds_d = device_get_ptr(ds)
551 dt_d = device_get_ptr(dt)
552
553 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
554#ifdef HAVE_HIP
555 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
556 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
557 msh%nelv, xh%lx)
558#elif HAVE_CUDA
559 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
560 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
561 msh%nelv, xh%lx)
562#elif HAVE_OPENCL
563 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
564 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
565 msh%nelv, xh%lx)
566#else
567 call neko_error('No device backend configured')
568#endif
569 end associate
570
571 end subroutine opr_device_cdtp
572
573 subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
574 type(space_t), intent(in) :: xh
575 type(coef_t), intent(in) :: coef
576 integer, intent(in) :: nelv, gdim
577 real(kind=rp), intent(inout) :: du(xh%lxyz, nelv)
578 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
579 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
580 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
581 real(kind=rp), intent(inout), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
582 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
583
584 du_d = device_get_ptr(du)
585 u_d = device_get_ptr(u)
586
587 vx_d = device_get_ptr(vx)
588 vy_d = device_get_ptr(vy)
589 vz_d = device_get_ptr(vz)
590
591 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
592#ifdef HAVE_HIP
593 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
594 xh%dx_d, xh%dy_d, xh%dz_d, &
595 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
596 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
597 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
598 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
599#elif HAVE_CUDA
600 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
601 xh%dx_d, xh%dy_d, xh%dz_d, &
602 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
603 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
604 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
605 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
606#elif HAVE_OPENCL
607 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
608 xh%dx_d, xh%dy_d, xh%dz_d, &
609 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
610 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
611 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
612 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
613#else
614 call neko_error('No device backend configured')
615#endif
616 end associate
617
618 end subroutine opr_device_conv1
619
620 subroutine opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, &
621 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
622 type(space_t), intent(in) :: xh_gl
623 type(space_t), intent(in) :: xh_gll
624 type(coef_t), intent(in) :: coef_gll
625 type(coef_t), intent(in) :: coef_gl
626 type(interpolator_t), intent(inout) :: gll_to_gl
627 real(kind=rp), intent(inout) :: &
628 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
629 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
630 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
631 type(c_ptr) :: du_d, ud_d
632 integer :: n_gl, n_gll
633
634 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
635 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
636
637 call device_map(ud, ud_d, n_gl)
638
639 du_d = device_get_ptr(du)
640
641 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
642#ifdef HAVE_HIP
643 call hip_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
644 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
645#elif HAVE_CUDA
646 call cuda_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
647 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
648#elif HAVE_OPENCL
649 call opencl_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
650 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
651#else
652 call neko_error('No device backend configured')
653#endif
654
655 call gll_to_gl%map(du, ud, nelv, xh_gll)
656 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
657 call device_col2(du_d, coef_gll%Binv_d, n_gll)
658
659 end associate
660
661 call device_free(ud_d)
662
663 end subroutine opr_device_convect_scalar
664
665 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
666 type(field_t), intent(inout) :: w1
667 type(field_t), intent(inout) :: w2
668 type(field_t), intent(inout) :: w3
669 type(field_t), intent(in) :: u1
670 type(field_t), intent(in) :: u2
671 type(field_t), intent(in) :: u3
672 type(field_t), intent(inout) :: work1
673 type(field_t), intent(inout) :: work2
674 type(coef_t), intent(in) :: c_xh
675 type(c_ptr), optional, intent(inout) :: event
676 integer :: gdim, n, nelv
677
678 n = w1%dof%size()
679 gdim = c_xh%msh%gdim
680 nelv = c_xh%msh%nelv
681
682 ! this%work1=dw/dy ; this%work2=dv/dz
683#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
684#ifdef HAVE_HIP
685 call hip_dudxyz(work1%x_d, u3%x_d, &
686 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
687 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
688 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
689#elif HAVE_CUDA
690 call cuda_dudxyz(work1%x_d, u3%x_d, &
691 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
692 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
693 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
694#elif HAVE_OPENCL
695 call opencl_dudxyz(work1%x_d, u3%x_d, &
696 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
697 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
698 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
699#endif
700 if (gdim .eq. 3) then
701#ifdef HAVE_HIP
702 call hip_dudxyz(work2%x_d, u2%x_d, &
703 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
704 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
705 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
706#elif HAVE_CUDA
707 call cuda_dudxyz(work2%x_d, u2%x_d, &
708 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
709 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
710 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
711#elif HAVE_OPENCL
712 call opencl_dudxyz(work2%x_d, u2%x_d, &
713 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
714 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
715 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
716#endif
717 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
718 else
719 call device_copy(w1%x_d, work1%x_d, n)
720 endif
721 ! this%work1=du/dz ; this%work2=dw/dx
722 if (gdim .eq. 3) then
723#ifdef HAVE_HIP
724 call hip_dudxyz(work1%x_d, u1%x_d, &
725 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
726 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
727 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
728 call hip_dudxyz(work2%x_d, u3%x_d, &
729 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
730 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
731 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
732#elif HAVE_CUDA
733 call cuda_dudxyz(work1%x_d, u1%x_d, &
734 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
735 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
736 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
737 call cuda_dudxyz(work2%x_d, u3%x_d, &
738 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
739 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
740 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
741#elif HAVE_OPENCL
742 call opencl_dudxyz(work1%x_d, u1%x_d, &
743 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
744 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
745 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
746 call opencl_dudxyz(work2%x_d, u3%x_d, &
747 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
748 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
749 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
750#endif
751 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
752 else
753 call device_rzero (work1%x_d, n)
754#ifdef HAVE_HIP
755 call hip_dudxyz(work2%x_d, u3%x_d, &
756 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
757 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
758 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
759#elif HAVE_CUDA
760 call cuda_dudxyz(work2%x_d, u3%x_d, &
761 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
762 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
763 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
764#elif HAVE_OPENCL
765 call opencl_dudxyz(work2%x_d, u3%x_d, &
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
769#endif
770 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
771 endif
772 ! this%work1=dv/dx ; this%work2=du/dy
773#ifdef HAVE_HIP
774 call hip_dudxyz(work1%x_d, u2%x_d, &
775 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
776 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
777 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
778 call hip_dudxyz(work2%x_d, u1%x_d, &
779 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
780 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
781 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
782#elif HAVE_CUDA
783 call cuda_dudxyz(work1%x_d, u2%x_d, &
784 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
785 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
786 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
787 call cuda_dudxyz(work2%x_d, u1%x_d, &
788 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
789 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
790 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
791#elif HAVE_OPENCL
792 call opencl_dudxyz(work1%x_d, u2%x_d, &
793 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
794 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
795 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
796 call opencl_dudxyz(work2%x_d, u1%x_d, &
797 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
798 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
799 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
800#endif
801 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
802 !! BC dependent, Needs to change if cyclic
803
804 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
805
806 if (present(event)) then
807 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
808 call c_xh%gs_h%op(w1, gs_op_add, event)
809 call device_event_sync(event)
810 call c_xh%gs_h%op(w2, gs_op_add, event)
811 call device_event_sync(event)
812 call c_xh%gs_h%op(w3, gs_op_add, event)
813 call device_event_sync(event)
814 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
815 else
816 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 1, c_xh)
817 call c_xh%gs_h%op(w1, gs_op_add)
818 call c_xh%gs_h%op(w2, gs_op_add)
819 call c_xh%gs_h%op(w3, gs_op_add)
820 if(c_xh%cyclic) call opr_device_rotate_cyc_r4(w1%x, w2%x, w3%x, 0, c_xh)
821 end if
822
823 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
824
825#else
826 call neko_error('No device backend configured')
827#endif
828
829 end subroutine opr_device_curl
830
831 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
832 type(space_t) :: xh
833 type(coef_t) :: coef
834 integer :: nelv, gdim
835 real(kind=rp) :: dt
836 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
837 real(kind=rp) :: cfl
838 type(c_ptr) :: u_d, v_d, w_d
839
840 u_d = device_get_ptr(u)
841 v_d = device_get_ptr(v)
842 w_d = device_get_ptr(w)
843
844#ifdef HAVE_HIP
845 cfl = hip_cfl(dt, u_d, v_d, w_d, &
846 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
847 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
848 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
849 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
850 coef%jacinv_d, nelv, xh%lx)
851#elif HAVE_CUDA
852 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
853 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
854 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
855 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
856 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
857 coef%jacinv_d, nelv, xh%lx)
858#elif HAVE_OPENCL
859 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
860 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
861 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
862 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
863 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
864 coef%jacinv_d, nelv, xh%lx)
865#else
866 cfl = 0.0_rp
867 call neko_error('No device backend configured')
868#endif
869 end function opr_device_cfl
870
871 subroutine opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
872 type(coef_t) :: coef
873 integer :: idir, ncyc
874 real(rp), dimension(coef%Xh%lx*coef%Xh%ly*coef%Xh%lz*coef%msh%nelv) :: &
875 vx, vy, vz
876 type(c_ptr) :: vx_d, vy_d, vz_d
877
878 vx_d = device_get_ptr(vx)
879 vy_d = device_get_ptr(vy)
880 vz_d = device_get_ptr(vz)
881 ncyc = coef%cyc_msk(0) - 1
882
883#ifdef HAVE_HIP
884 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
885 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
886 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
887 ncyc, idir)
888#elif HAVE_CUDA
889 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
890 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
891 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
892 ncyc, idir)
893#elif HAVE_OPENCL
894
895 call neko_error('No device backend configured for rotate_cyc')
896#else
897 call neko_error('No device backend configured for rotate_cyc')
898#endif
899 end subroutine opr_device_rotate_cyc_r1
900
901 subroutine opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
902 type(coef_t) :: coef
903 integer :: idir, ncyc
904 real(rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
905 vx, vy, vz
906 type(c_ptr) :: vx_d, vy_d, vz_d
907
908 vx_d = device_get_ptr(vx)
909 vy_d = device_get_ptr(vy)
910 vz_d = device_get_ptr(vz)
911 ncyc = coef%cyc_msk(0) - 1
912
913#ifdef HAVE_HIP
914 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
915 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
916 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
917 ncyc, idir)
918#elif HAVE_CUDA
919 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
920 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
921 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
922 ncyc, idir)
923#elif HAVE_OPENCL
924
925 call neko_error('No device backend configured for rotate_cyc')
926#else
927 call neko_error('No device backend configured for rotate_cyc')
928#endif
929 end subroutine opr_device_rotate_cyc_r4
930
931 subroutine opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
932 Xh, coef)
933 type(space_t), intent(inout) :: xh
934 type(coef_t), intent(inout) :: coef
935 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
936
937#ifdef HAVE_HIP
938 call hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
939 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
940 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
941 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
942 xh%w3_d, coef%msh%nelv, xh%lx)
943#elif HAVE_CUDA
944 call cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
945 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
946 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
947 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
948 xh%w3_d, coef%msh%nelv, xh%lx)
949#elif HAVE_OPENCL
950 call opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
951 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
952 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
953 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
954 xh%w3_d, coef%msh%nelv, xh%lx)
955#else
956 call neko_error('No device backend configured')
957#endif
958
959 end subroutine opr_device_set_convect_rst
960
961end module opr_device
962
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:57
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:57
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:57
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:57
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63