Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp, i8
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
46 use, intrinsic :: iso_c_binding
47 implicit none
48 private
49
54
55#ifdef HAVE_HIP
56 interface
57 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
58 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
59 bind(c, name = 'hip_dudxyz')
60 use, intrinsic :: iso_c_binding
61 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
62 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
63 integer(c_int) :: nel, lx
64 end subroutine hip_dudxyz
65 end interface
66
67 interface
68 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
69 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
70 bind(c, name = 'hip_cdtp')
71 use, intrinsic :: iso_c_binding
72 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
73 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
74 integer(c_int) :: nel, lx
75 end subroutine hip_cdtp
76 end interface
77
78 interface
79 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
80 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
81 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
82 jacinv_d, nel, gdim, lx) &
83 bind(c, name = 'hip_conv1')
84 use, intrinsic :: iso_c_binding
85 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
86 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
87 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
88 type(c_ptr), value :: jacinv_d
89 integer(c_int) :: nel, gdim, lx
90 end subroutine hip_conv1
91 end interface
92
93 interface
94 subroutine hip_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
95 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
96 use, intrinsic :: iso_c_binding
97 type(c_ptr), value :: du_d, u_d
98 type(c_ptr), value :: cr_d, cs_d, ct_d
99 type(c_ptr), value :: dx_d, dy_d, dz_d
100 integer(c_int) :: nel, lx
101 end subroutine hip_convect_scalar
102 end interface
103
104 interface
105 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
106 dx_d, dy_d, dz_d, &
107 drdx_d, dsdx_d, dtdx_d, &
108 drdy_d, dsdy_d, dtdy_d, &
109 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
110 bind(c, name = 'hip_opgrad')
111 use, intrinsic :: iso_c_binding
112 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
113 type(c_ptr), value :: dx_d, dy_d, dz_d
114 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
115 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
116 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
117 type(c_ptr), value :: w3_d
118 integer(c_int) :: nel, lx
119 end subroutine hip_opgrad
120 end interface
121
122 interface
123 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
124 dx_d, dy_d, dz_d, &
125 drdx_d, dsdx_d, dtdx_d, &
126 drdy_d, dsdy_d, dtdy_d, &
127 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
128 bind(c, name = 'hip_lambda2')
129 use, intrinsic :: iso_c_binding
130 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
131 type(c_ptr), value :: dx_d, dy_d, dz_d
132 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
133 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
134 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
135 type(c_ptr), value :: jacinv_d
136 integer(c_int) :: nel, lx
137 end subroutine hip_lambda2
138 end interface
139
140 interface
141 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
142 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
143 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
144 jacinv_d, nel, lx) &
145 bind(c, name = 'hip_cfl')
146 use, intrinsic :: iso_c_binding
147 import c_rp
148 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
149 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
150 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
151 real(c_rp) :: dt
152 integer(c_int) :: nel, lx
153 end function hip_cfl
154 end interface
155
156 interface
157 subroutine hip_rotate_cyc(vx_d, vy_d, vz_d, &
158 x_d, y_d, z_d, &
159 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
160 bind(c, name = 'hip_rotate_cyc')
161 use, intrinsic :: iso_c_binding
162 type(c_ptr), value :: vx_d, vy_d, vz_d
163 type(c_ptr), value :: x_d, y_d, z_d
164 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
165 integer(c_int) :: ncyc, idir
166 end subroutine hip_rotate_cyc
167 end interface
168
169 interface
170 subroutine hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
171 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
172 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
173 use, intrinsic :: iso_c_binding
174 type(c_ptr), value :: cr_d, cs_d, ct_d
175 type(c_ptr), value :: cx_d, cy_d, cz_d
176 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
177 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
178 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
179 type(c_ptr), value :: w3_d
180 integer(c_int) :: nel, lx
181 end subroutine hip_set_convect_rst
182 end interface
183
184#elif HAVE_CUDA
185 interface
186 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
187 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
188 bind(c, name = 'cuda_dudxyz')
189 use, intrinsic :: iso_c_binding
190 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
191 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
192 integer(c_int) :: nel, lx
193 end subroutine cuda_dudxyz
194 end interface
195
196 interface
197 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
198 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
199 bind(c, name = 'cuda_cdtp')
200 use, intrinsic :: iso_c_binding
201 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
202 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
203 integer(c_int) :: nel, lx
204 end subroutine cuda_cdtp
205 end interface
206
207 interface
208 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
209 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
210 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
211 jacinv_d, nel, gdim, lx) &
212 bind(c, name = 'cuda_conv1')
213 use, intrinsic :: iso_c_binding
214 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
215 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
216 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
217 type(c_ptr), value :: jacinv_d
218 integer(c_int) :: nel, gdim, lx
219 end subroutine cuda_conv1
220 end interface
221
222 interface
223 subroutine cuda_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
224 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
225 use, intrinsic :: iso_c_binding
226 type(c_ptr), value :: du_d, u_d
227 type(c_ptr), value :: cr_d, cs_d, ct_d
228 type(c_ptr), value :: dx_d, dy_d, dz_d
229 integer(c_int) :: nel, lx
230 end subroutine cuda_convect_scalar
231 end interface
232
233 interface
234 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
235 dx_d, dy_d, dz_d, &
236 drdx_d, dsdx_d, dtdx_d, &
237 drdy_d, dsdy_d, dtdy_d, &
238 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
239 bind(c, name = 'cuda_opgrad')
240 use, intrinsic :: iso_c_binding
241 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
242 type(c_ptr), value :: dx_d, dy_d, dz_d
243 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
244 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
245 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
246 type(c_ptr), value :: w3_d
247 integer(c_int) :: nel, lx
248 end subroutine cuda_opgrad
249 end interface
250
251 interface
252 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
253 dx_d, dy_d, dz_d, &
254 drdx_d, dsdx_d, dtdx_d, &
255 drdy_d, dsdy_d, dtdy_d, &
256 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
257 bind(c, name = 'cuda_lambda2')
258 use, intrinsic :: iso_c_binding
259 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
260 type(c_ptr), value :: dx_d, dy_d, dz_d
261 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
262 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
263 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
264 type(c_ptr), value :: jacinv_d
265 integer(c_int) :: nel, lx
266 end subroutine cuda_lambda2
267 end interface
268
269 interface
270 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
271 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
272 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
273 jacinv_d, nel, lx) &
274 bind(c, name = 'cuda_cfl')
275 use, intrinsic :: iso_c_binding
276 import c_rp
277 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
278 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
279 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
280 real(c_rp) :: dt
281 integer(c_int) :: nel, lx
282 end function cuda_cfl
283 end interface
284
285 interface
286 subroutine cuda_rotate_cyc(vx_d, vy_d, vz_d, &
287 x_d, y_d, z_d, &
288 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
289 bind(c, name = 'cuda_rotate_cyc')
290 use, intrinsic :: iso_c_binding
291 type(c_ptr), value :: vx_d, vy_d, vz_d
292 type(c_ptr), value :: x_d, y_d, z_d
293 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
294 integer(c_int) :: ncyc, idir
295 end subroutine cuda_rotate_cyc
296 end interface
297
298 interface
299 subroutine cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
300 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
301 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
302 use, intrinsic :: iso_c_binding
303 type(c_ptr), value :: cr_d, cs_d, ct_d
304 type(c_ptr), value :: cx_d, cy_d, cz_d
305 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
306 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
307 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
308 type(c_ptr), value :: w3_d
309 integer(c_int) :: nel, lx
310 end subroutine cuda_set_convect_rst
311 end interface
312
313#elif HAVE_OPENCL
314 interface
315 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
316 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
317 bind(c, name = 'opencl_dudxyz')
318 use, intrinsic :: iso_c_binding
319 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
320 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
321 integer(c_int) :: nel, lx
322 end subroutine opencl_dudxyz
323 end interface
324
325 interface
326 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
327 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
328 bind(c, name = 'opencl_cdtp')
329 use, intrinsic :: iso_c_binding
330 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
331 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
332 integer(c_int) :: nel, lx
333 end subroutine opencl_cdtp
334 end interface
335
336 interface
337 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
338 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
339 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
340 jacinv_d, nel, gdim, lx) &
341 bind(c, name = 'opencl_conv1')
342 use, intrinsic :: iso_c_binding
343 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
344 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
345 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
346 type(c_ptr), value :: jacinv_d
347 integer(c_int) :: nel, gdim, lx
348 end subroutine opencl_conv1
349 end interface
350
351 interface
352 subroutine opencl_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
353 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
354 use, intrinsic :: iso_c_binding
355 type(c_ptr), value :: du_d, u_d
356 type(c_ptr), value :: cr_d, cs_d, ct_d
357 type(c_ptr), value :: dx_d, dy_d, dz_d
358 integer(c_int) :: nel, lx
359 end subroutine opencl_convect_scalar
360 end interface
361
362 interface
363 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
364 dx_d, dy_d, dz_d, &
365 drdx_d, dsdx_d, dtdx_d, &
366 drdy_d, dsdy_d, dtdy_d, &
367 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
368 bind(c, name = 'opencl_opgrad')
369 use, intrinsic :: iso_c_binding
370 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
371 type(c_ptr), value :: dx_d, dy_d, dz_d
372 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
373 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
374 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
375 type(c_ptr), value :: w3_d
376 integer(c_int) :: nel, lx
377 end subroutine opencl_opgrad
378 end interface
379
380 interface
381 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
382 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
383 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
384 jacinv_d, nel, lx) &
385 bind(c, name = 'opencl_cfl')
386 use, intrinsic :: iso_c_binding
387 import c_rp
388 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
389 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
390 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
391 real(c_rp) :: dt
392 integer(c_int) :: nel, lx
393 end function opencl_cfl
394 end interface
395
396 interface
397 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
398 dx_d, dy_d, dz_d, &
399 drdx_d, dsdx_d, dtdx_d, &
400 drdy_d, dsdy_d, dtdy_d, &
401 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
402 bind(c, name = 'opencl_lambda2')
403 use, intrinsic :: iso_c_binding
404 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
405 type(c_ptr), value :: dx_d, dy_d, dz_d
406 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
407 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
408 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
409 type(c_ptr), value :: jacinv_d
410 integer(c_int) :: nel, lx
411 end subroutine opencl_lambda2
412 end interface
413
414 interface
415 subroutine opencl_rotate_cyc(vx_d, vy_d, vz_d, &
416 x_d, y_d, z_d, &
417 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
418 bind(c, name = 'opencl_rotate_cyc')
419 use, intrinsic :: iso_c_binding
420 type(c_ptr), value :: vx_d, vy_d, vz_d
421 type(c_ptr), value :: x_d, y_d, z_d
422 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
423 integer(c_int) :: ncyc, idir
424 end subroutine opencl_rotate_cyc
425 end interface
426
427 interface
428 subroutine opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
429 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
430 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
431 use, intrinsic :: iso_c_binding
432 type(c_ptr), value :: cr_d, cs_d, ct_d
433 type(c_ptr), value :: cx_d, cy_d, cz_d
434 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
435 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
436 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
437 type(c_ptr), value :: w3_d
438 integer(c_int) :: nel, lx
439 end subroutine opencl_set_convect_rst
440 end interface
441
442#endif
443
444contains
445
446 subroutine opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
447 type(coef_t), intent(in), target :: coef
448 type(c_ptr), intent(inout) :: du_d
449 type(c_ptr), intent(in) :: u_d, dr_d, ds_d, dt_d
450
451 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
452#ifdef HAVE_HIP
453 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
454 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
455 msh%nelv, xh%lx)
456#elif HAVE_CUDA
457 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
458 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
459 msh%nelv, xh%lx)
460#elif HAVE_OPENCL
461 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
462 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
463 msh%nelv, xh%lx)
464#else
465 call neko_error('No device backend configured')
466#endif
467 end associate
468
469 end subroutine opr_device_dudxyz
470
471 subroutine opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
472 type(coef_t), intent(in), target :: coef
473 type(c_ptr), intent(inout) :: ux_d, uy_d, uz_d
474 type(c_ptr), intent(in) :: u_d
475
476 associate(xh => coef%Xh, msh => coef%msh)
477#ifdef HAVE_HIP
478 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
479 xh%dx_d, xh%dy_d, xh%dz_d, &
480 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
481 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
482 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
483 xh%w3_d, msh%nelv, xh%lx)
484#elif HAVE_CUDA
485 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
486 xh%dx_d, xh%dy_d, xh%dz_d, &
487 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490 xh%w3_d, msh%nelv, xh%lx)
491#elif HAVE_OPENCL
492 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
493 xh%dx_d, xh%dy_d, xh%dz_d, &
494 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497 xh%w3_d, msh%nelv, xh%lx)
498#else
499 call neko_error('No device backend configured')
500#endif
501 end associate
502
503 end subroutine opr_device_opgrad
504
510 subroutine device_ortho(x_d, glb_n_points, n)
511 integer, intent(in) :: n
512 integer(kind=i8), intent(in) :: glb_n_points
513 type(c_ptr), intent(inout) :: x_d
514 real(kind=rp) :: c
515
516 c = device_glsum(x_d, n) / glb_n_points
517 call device_cadd(x_d, -c, n)
518
519 end subroutine device_ortho
520
521 subroutine opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
522 type(coef_t), intent(in) :: coef
523 type(c_ptr), intent(inout) :: lambda2_d
524 type(c_ptr), intent(in) :: u_d, v_d, w_d
525#ifdef HAVE_HIP
526 call hip_lambda2(lambda2_d, u_d, v_d, w_d, &
527 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
528 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
529 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
530 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
531 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
532#elif HAVE_CUDA
533 call cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
534 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
535 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
536 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
537 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
538 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
539#elif HAVE_OPENCL
540 call opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
541 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
542 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
543 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
544 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
545 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
546#else
547 call neko_error('No device backend configured')
548#endif
549 end subroutine opr_device_lambda2
550
551 subroutine opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
552 type(coef_t), intent(in), target :: coef
553 type(c_ptr), intent(inout) :: dtx_d, x_d
554 type(c_ptr), intent(in) :: dr_d, ds_d, dt_d
555
556 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
557#ifdef HAVE_HIP
558 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
559 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
560 msh%nelv, xh%lx)
561#elif HAVE_CUDA
562 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
563 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
564 msh%nelv, xh%lx)
565#elif HAVE_OPENCL
566 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
567 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
568 msh%nelv, xh%lx)
569#else
570 call neko_error('No device backend configured')
571#endif
572 end associate
573
574 end subroutine opr_device_cdtp
575
576 subroutine opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, Xh, coef, nelv, gdim)
577 type(space_t), intent(in) :: xh
578 type(coef_t), intent(in), target :: coef
579 integer, intent(in) :: nelv, gdim
580 type(c_ptr), intent(inout) :: du_d
581 type(c_ptr), intent(in) :: u_d, vx_d, vy_d, vz_d
582
583 associate(msh => coef%msh, dof => coef%dof)
584#ifdef HAVE_HIP
585 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
586 xh%dx_d, xh%dy_d, xh%dz_d, &
587 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
588 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
589 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
590 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
591#elif HAVE_CUDA
592 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
593 xh%dx_d, xh%dy_d, xh%dz_d, &
594 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
595 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
596 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
597 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
598#elif HAVE_OPENCL
599 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
600 xh%dx_d, xh%dy_d, xh%dz_d, &
601 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
602 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
603 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
604 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
605#else
606 call neko_error('No device backend configured')
607#endif
608 end associate
609
610 end subroutine opr_device_conv1
611
612 subroutine opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, &
613 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
614 type(space_t), intent(in) :: xh_gl
615 type(space_t), intent(in) :: xh_gll
616 type(coef_t), intent(in) :: coef_gll
617 type(coef_t), intent(in) :: coef_gl
618 type(interpolator_t), intent(inout) :: gll_to_gl
619 real(kind=rp), intent(inout) :: &
620 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
621 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
622 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
623 type(c_ptr) :: du_d, ud_d
624 integer :: n_gl, n_gll
625
626 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
627 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
628
629 call device_map(ud, ud_d, n_gl)
630
631 du_d = device_get_ptr(du)
632
633 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
634#ifdef HAVE_HIP
635 call hip_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
636 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
637#elif HAVE_CUDA
638 call cuda_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
639 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
640#elif HAVE_OPENCL
641 call opencl_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
642 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
643#else
644 call neko_error('No device backend configured')
645#endif
646
647 call gll_to_gl%map(du, ud, nelv, xh_gll)
648 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
649 call device_col2(du_d, coef_gll%Binv_d, n_gll)
650
651 end associate
652
653 call device_free(ud_d)
654
655 end subroutine opr_device_convect_scalar
656
657 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
658 type(field_t), intent(inout) :: w1
659 type(field_t), intent(inout) :: w2
660 type(field_t), intent(inout) :: w3
661 type(field_t), intent(in) :: u1
662 type(field_t), intent(in) :: u2
663 type(field_t), intent(in) :: u3
664 type(field_t), intent(inout) :: work1
665 type(field_t), intent(inout) :: work2
666 type(coef_t), intent(in) :: c_xh
667 type(c_ptr), optional, intent(inout) :: event
668 integer :: gdim, n, nelv
669
670 n = w1%dof%size()
671 gdim = c_xh%msh%gdim
672 nelv = c_xh%msh%nelv
673
674 ! this%work1=dw/dy ; this%work2=dv/dz
675#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
676#ifdef HAVE_HIP
677 call hip_dudxyz(work1%x_d, u3%x_d, &
678 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
679 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
680 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
681#elif HAVE_CUDA
682 call cuda_dudxyz(work1%x_d, u3%x_d, &
683 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
684 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
685 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
686#elif HAVE_OPENCL
687 call opencl_dudxyz(work1%x_d, u3%x_d, &
688 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
689 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
690 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
691#endif
692 if (gdim .eq. 3) then
693#ifdef HAVE_HIP
694 call hip_dudxyz(work2%x_d, u2%x_d, &
695 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
696 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
697 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
698#elif HAVE_CUDA
699 call cuda_dudxyz(work2%x_d, u2%x_d, &
700 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
701 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
702 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
703#elif HAVE_OPENCL
704 call opencl_dudxyz(work2%x_d, u2%x_d, &
705 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
706 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
707 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
708#endif
709 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
710 else
711 call device_copy(w1%x_d, work1%x_d, n)
712 endif
713 ! this%work1=du/dz ; this%work2=dw/dx
714 if (gdim .eq. 3) then
715#ifdef HAVE_HIP
716 call hip_dudxyz(work1%x_d, u1%x_d, &
717 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
718 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
719 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
720 call hip_dudxyz(work2%x_d, u3%x_d, &
721 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
722 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
723 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
724#elif HAVE_CUDA
725 call cuda_dudxyz(work1%x_d, u1%x_d, &
726 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
727 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
728 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
729 call cuda_dudxyz(work2%x_d, u3%x_d, &
730 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
731 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
732 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
733#elif HAVE_OPENCL
734 call opencl_dudxyz(work1%x_d, u1%x_d, &
735 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
736 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
737 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
738 call opencl_dudxyz(work2%x_d, u3%x_d, &
739 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
740 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
741 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
742#endif
743 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
744 else
745 call device_rzero (work1%x_d, n)
746#ifdef HAVE_HIP
747 call hip_dudxyz(work2%x_d, u3%x_d, &
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
751#elif HAVE_CUDA
752 call cuda_dudxyz(work2%x_d, u3%x_d, &
753 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
754 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
755 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
756#elif HAVE_OPENCL
757 call opencl_dudxyz(work2%x_d, u3%x_d, &
758 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
759 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
760 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
761#endif
762 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
763 endif
764 ! this%work1=dv/dx ; this%work2=du/dy
765#ifdef HAVE_HIP
766 call hip_dudxyz(work1%x_d, u2%x_d, &
767 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
768 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
769 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
770 call hip_dudxyz(work2%x_d, u1%x_d, &
771 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
772 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
773 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
774#elif HAVE_CUDA
775 call cuda_dudxyz(work1%x_d, u2%x_d, &
776 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
777 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
778 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
779 call cuda_dudxyz(work2%x_d, u1%x_d, &
780 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
781 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
782 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
783#elif HAVE_OPENCL
784 call opencl_dudxyz(work1%x_d, u2%x_d, &
785 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
786 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
787 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
788 call opencl_dudxyz(work2%x_d, u1%x_d, &
789 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
790 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
791 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
792#endif
793 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
794 !! BC dependent, Needs to change if cyclic
795
796 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
797
798 if (present(event)) then
799 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 1, c_xh)
800 call c_xh%gs_h%op(w1, gs_op_add, event)
801 call device_event_sync(event)
802 call c_xh%gs_h%op(w2, gs_op_add, event)
803 call device_event_sync(event)
804 call c_xh%gs_h%op(w3, gs_op_add, event)
805 call device_event_sync(event)
806 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 0, c_xh)
807 else
808 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 1, c_xh)
809 call c_xh%gs_h%op(w1, gs_op_add)
810 call c_xh%gs_h%op(w2, gs_op_add)
811 call c_xh%gs_h%op(w3, gs_op_add)
812 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 0, c_xh)
813 end if
814
815 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
816
817#else
818 call neko_error('No device backend configured')
819#endif
820
821 end subroutine opr_device_curl
822
823 function opr_device_cfl(dt, u_d, v_d, w_d, Xh, coef, nelv, gdim) result(cfl)
824 type(space_t) :: xh
825 type(coef_t) :: coef
826 integer :: nelv, gdim
827 real(kind=rp) :: dt
828 type(c_ptr), intent(in) :: u_d, v_d, w_d
829 real(kind=rp) :: cfl
830
831#ifdef HAVE_HIP
832 cfl = hip_cfl(dt, u_d, v_d, w_d, &
833 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
834 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
835 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
836 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
837 coef%jacinv_d, nelv, xh%lx)
838#elif HAVE_CUDA
839 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
840 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
841 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
842 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
843 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
844 coef%jacinv_d, nelv, xh%lx)
845#elif HAVE_OPENCL
846 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
847 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
848 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
849 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
850 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
851 coef%jacinv_d, nelv, xh%lx)
852#else
853 cfl = 0.0_rp
854 call neko_error('No device backend configured')
855#endif
856 end function opr_device_cfl
857
858 subroutine opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
859 type(c_ptr), intent(inout) :: vx_d, vy_d, vz_d
860 integer, intent(in) :: idir
861 type(coef_t), intent(in) :: coef
862 integer :: ncyc
863
864 ncyc = coef%cyc_msk(0) - 1
865
866 if (ncyc .le. 0) return
867
868#ifdef HAVE_HIP
869 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
870 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
871 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
872 ncyc, idir)
873#elif HAVE_CUDA
874 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
875 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
876 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
877 ncyc, idir)
878#elif HAVE_OPENCL
879 call opencl_rotate_cyc(vx_d, vy_d, vz_d, &
880 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
881 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
882 ncyc, idir)
883#else
884 call neko_error('No device backend configured for rotate_cyc')
885#endif
886 end subroutine opr_device_rotate_cyc
887
888 subroutine opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
889 Xh, coef)
890 type(space_t), intent(inout) :: xh
891 type(coef_t), intent(inout) :: coef
892 type(c_ptr), intent(inout) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
893
894#ifdef HAVE_HIP
895 call hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
896 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
897 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
898 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
899 xh%w3_d, coef%msh%nelv, xh%lx)
900#elif HAVE_CUDA
901 call cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
902 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
903 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
904 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
905 xh%w3_d, coef%msh%nelv, xh%lx)
906#elif HAVE_OPENCL
907 call opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
908 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
909 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
910 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
911 xh%w3_d, coef%msh%nelv, xh%lx)
912#else
913 call neko_error('No device backend configured')
914#endif
915
916 end subroutine opr_device_set_convect_rst
917
918end module opr_device
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1515
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
subroutine, public opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
real(kind=rp) function, public opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
subroutine, public opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
subroutine, public device_ortho(x_d, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
subroutine, public opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
subroutine, public opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:57
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:57
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:57
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:57
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
void opencl_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63