Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
opr_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use gather_scatter, only : gs_op_add
36 use num_types, only : rp, c_rp, i8
38 use space, only : space_t
39 use coefs, only : coef_t
40 use field, only : field_t
41 use utils, only : neko_error
46 use, intrinsic :: iso_c_binding
47 implicit none
48 private
49
54
55#ifdef HAVE_HIP
56 interface
57 subroutine hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
58 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
59 bind(c, name = 'hip_dudxyz')
60 use, intrinsic :: iso_c_binding
61 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
62 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
63 integer(c_int) :: nel, lx
64 end subroutine hip_dudxyz
65 end interface
66
67 interface
68 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
69 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
70 bind(c, name = 'hip_cdtp')
71 use, intrinsic :: iso_c_binding
72 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
73 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
74 integer(c_int) :: nel, lx
75 end subroutine hip_cdtp
76 end interface
77
78 interface
79 subroutine hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
80 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
81 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
82 jacinv_d, nel, gdim, lx) &
83 bind(c, name = 'hip_conv1')
84 use, intrinsic :: iso_c_binding
85 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
86 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
87 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
88 type(c_ptr), value :: jacinv_d
89 integer(c_int) :: nel, gdim, lx
90 end subroutine hip_conv1
91 end interface
92
93 interface
94 subroutine hip_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
95 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
96 use, intrinsic :: iso_c_binding
97 type(c_ptr), value :: du_d, u_d
98 type(c_ptr), value :: cr_d, cs_d, ct_d
99 type(c_ptr), value :: dx_d, dy_d, dz_d
100 integer(c_int) :: nel, lx
101 end subroutine hip_convect_scalar
102 end interface
103
104 interface
105 subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
106 dx_d, dy_d, dz_d, &
107 drdx_d, dsdx_d, dtdx_d, &
108 drdy_d, dsdy_d, dtdy_d, &
109 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
110 bind(c, name = 'hip_opgrad')
111 use, intrinsic :: iso_c_binding
112 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
113 type(c_ptr), value :: dx_d, dy_d, dz_d
114 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
115 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
116 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
117 type(c_ptr), value :: w3_d
118 integer(c_int) :: nel, lx
119 end subroutine hip_opgrad
120 end interface
121
122 interface
123 subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
124 dx_d, dy_d, dz_d, &
125 drdx_d, dsdx_d, dtdx_d, &
126 drdy_d, dsdy_d, dtdy_d, &
127 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
128 bind(c, name = 'hip_lambda2')
129 use, intrinsic :: iso_c_binding
130 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
131 type(c_ptr), value :: dx_d, dy_d, dz_d
132 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
133 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
134 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
135 type(c_ptr), value :: jacinv_d
136 integer(c_int) :: nel, lx
137 end subroutine hip_lambda2
138 end interface
139
140 interface
141 real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
142 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
143 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
144 jacinv_d, nel, lx) &
145 bind(c, name = 'hip_cfl')
146 use, intrinsic :: iso_c_binding
147 import c_rp
148 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
149 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
150 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
151 real(c_rp) :: dt
152 integer(c_int) :: nel, lx
153 end function hip_cfl
154 end interface
155
156 interface
157 subroutine hip_rotate_cyc(vx_d, vy_d, vz_d, &
158 x_d, y_d, z_d, &
159 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
160 bind(c, name = 'hip_rotate_cyc')
161 use, intrinsic :: iso_c_binding
162 type(c_ptr), value :: vx_d, vy_d, vz_d
163 type(c_ptr), value :: x_d, y_d, z_d
164 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
165 integer(c_int) :: ncyc, idir
166 end subroutine hip_rotate_cyc
167 end interface
168
169 interface
170 subroutine hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
171 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
172 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
173 use, intrinsic :: iso_c_binding
174 type(c_ptr), value :: cr_d, cs_d, ct_d
175 type(c_ptr), value :: cx_d, cy_d, cz_d
176 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
177 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
178 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
179 type(c_ptr), value :: w3_d
180 integer(c_int) :: nel, lx
181 end subroutine hip_set_convect_rst
182 end interface
183
184#elif HAVE_CUDA
185 interface
186 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
187 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
188 bind(c, name = 'cuda_dudxyz')
189 use, intrinsic :: iso_c_binding
190 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
191 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
192 integer(c_int) :: nel, lx
193 end subroutine cuda_dudxyz
194 end interface
195
196 interface
197 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
198 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
199 bind(c, name = 'cuda_cdtp')
200 use, intrinsic :: iso_c_binding
201 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
202 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
203 integer(c_int) :: nel, lx
204 end subroutine cuda_cdtp
205 end interface
206
207 interface
208 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
209 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
210 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
211 jacinv_d, nel, gdim, lx) &
212 bind(c, name = 'cuda_conv1')
213 use, intrinsic :: iso_c_binding
214 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
215 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
216 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
217 type(c_ptr), value :: jacinv_d
218 integer(c_int) :: nel, gdim, lx
219 end subroutine cuda_conv1
220 end interface
221
222 interface
223 subroutine cuda_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
224 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
225 use, intrinsic :: iso_c_binding
226 type(c_ptr), value :: du_d, u_d
227 type(c_ptr), value :: cr_d, cs_d, ct_d
228 type(c_ptr), value :: dx_d, dy_d, dz_d
229 integer(c_int) :: nel, lx
230 end subroutine cuda_convect_scalar
231 end interface
232
233 interface
234 subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
235 dx_d, dy_d, dz_d, &
236 drdx_d, dsdx_d, dtdx_d, &
237 drdy_d, dsdy_d, dtdy_d, &
238 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
239 bind(c, name = 'cuda_opgrad')
240 use, intrinsic :: iso_c_binding
241 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
242 type(c_ptr), value :: dx_d, dy_d, dz_d
243 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
244 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
245 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
246 type(c_ptr), value :: w3_d
247 integer(c_int) :: nel, lx
248 end subroutine cuda_opgrad
249 end interface
250
251 interface
252 subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
253 dx_d, dy_d, dz_d, &
254 drdx_d, dsdx_d, dtdx_d, &
255 drdy_d, dsdy_d, dtdy_d, &
256 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
257 bind(c, name = 'cuda_lambda2')
258 use, intrinsic :: iso_c_binding
259 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
260 type(c_ptr), value :: dx_d, dy_d, dz_d
261 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
262 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
263 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
264 type(c_ptr), value :: jacinv_d
265 integer(c_int) :: nel, lx
266 end subroutine cuda_lambda2
267 end interface
268
269 interface
270 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
271 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
272 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
273 jacinv_d, nel, lx) &
274 bind(c, name = 'cuda_cfl')
275 use, intrinsic :: iso_c_binding
276 import c_rp
277 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
278 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
279 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
280 real(c_rp) :: dt
281 integer(c_int) :: nel, lx
282 end function cuda_cfl
283 end interface
284
285 interface
286 subroutine cuda_rotate_cyc(vx_d, vy_d, vz_d, &
287 x_d, y_d, z_d, &
288 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
289 bind(c, name = 'cuda_rotate_cyc')
290 use, intrinsic :: iso_c_binding
291 type(c_ptr), value :: vx_d, vy_d, vz_d
292 type(c_ptr), value :: x_d, y_d, z_d
293 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
294 integer(c_int) :: ncyc, idir
295 end subroutine cuda_rotate_cyc
296 end interface
297
298 interface
299 subroutine cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
300 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
301 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
302 use, intrinsic :: iso_c_binding
303 type(c_ptr), value :: cr_d, cs_d, ct_d
304 type(c_ptr), value :: cx_d, cy_d, cz_d
305 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
306 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
307 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
308 type(c_ptr), value :: w3_d
309 integer(c_int) :: nel, lx
310 end subroutine cuda_set_convect_rst
311 end interface
312
313#elif HAVE_OPENCL
314 interface
315 subroutine opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
316 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
317 bind(c, name = 'opencl_dudxyz')
318 use, intrinsic :: iso_c_binding
319 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
320 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
321 integer(c_int) :: nel, lx
322 end subroutine opencl_dudxyz
323 end interface
324
325 interface
326 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
327 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
328 bind(c, name = 'opencl_cdtp')
329 use, intrinsic :: iso_c_binding
330 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
331 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
332 integer(c_int) :: nel, lx
333 end subroutine opencl_cdtp
334 end interface
335
336 interface
337 subroutine opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
338 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
339 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
340 jacinv_d, nel, gdim, lx) &
341 bind(c, name = 'opencl_conv1')
342 use, intrinsic :: iso_c_binding
343 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
344 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
345 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
346 type(c_ptr), value :: jacinv_d
347 integer(c_int) :: nel, gdim, lx
348 end subroutine opencl_conv1
349 end interface
350
351 interface
352 subroutine opencl_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
353 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
354 use, intrinsic :: iso_c_binding
355 type(c_ptr), value :: du_d, u_d
356 type(c_ptr), value :: cr_d, cs_d, ct_d
357 type(c_ptr), value :: dx_d, dy_d, dz_d
358 integer(c_int) :: nel, lx
359 end subroutine opencl_convect_scalar
360 end interface
361
362 interface
363 subroutine opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
364 dx_d, dy_d, dz_d, &
365 drdx_d, dsdx_d, dtdx_d, &
366 drdy_d, dsdy_d, dtdy_d, &
367 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
368 bind(c, name = 'opencl_opgrad')
369 use, intrinsic :: iso_c_binding
370 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
371 type(c_ptr), value :: dx_d, dy_d, dz_d
372 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
373 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
374 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
375 type(c_ptr), value :: w3_d
376 integer(c_int) :: nel, lx
377 end subroutine opencl_opgrad
378 end interface
379
380 interface
381 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
382 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
383 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
384 jacinv_d, nel, lx) &
385 bind(c, name = 'opencl_cfl')
386 use, intrinsic :: iso_c_binding
387 import c_rp
388 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
389 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
390 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
391 real(c_rp) :: dt
392 integer(c_int) :: nel, lx
393 end function opencl_cfl
394 end interface
395
396 interface
397 subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
398 dx_d, dy_d, dz_d, &
399 drdx_d, dsdx_d, dtdx_d, &
400 drdy_d, dsdy_d, dtdy_d, &
401 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
402 bind(c, name = 'opencl_lambda2')
403 use, intrinsic :: iso_c_binding
404 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
405 type(c_ptr), value :: dx_d, dy_d, dz_d
406 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
407 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
408 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
409 type(c_ptr), value :: jacinv_d
410 integer(c_int) :: nel, lx
411 end subroutine opencl_lambda2
412 end interface
413
414 interface
415 subroutine opencl_rotate_cyc(vx_d, vy_d, vz_d, &
416 x_d, y_d, z_d, &
417 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
418 bind(c, name = 'opencl_rotate_cyc')
419 use, intrinsic :: iso_c_binding
420 type(c_ptr), value :: vx_d, vy_d, vz_d
421 type(c_ptr), value :: x_d, y_d, z_d
422 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
423 integer(c_int) :: ncyc, idir
424 end subroutine opencl_rotate_cyc
425 end interface
426
427 interface
428 subroutine opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
429 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
430 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
431 use, intrinsic :: iso_c_binding
432 type(c_ptr), value :: cr_d, cs_d, ct_d
433 type(c_ptr), value :: cx_d, cy_d, cz_d
434 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
435 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
436 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
437 type(c_ptr), value :: w3_d
438 integer(c_int) :: nel, lx
439 end subroutine opencl_set_convect_rst
440 end interface
441
442#elif HAVE_METAL
443 interface
444 subroutine metal_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
445 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
446 bind(c, name = 'metal_dudxyz')
447 use, intrinsic :: iso_c_binding
448 type(c_ptr), value :: du_d, u_d, dr_d, ds_d, dt_d
449 type(c_ptr), value :: dx_d, dy_d, dz_d, jacinv_d
450 integer(c_int) :: nel, lx
451 end subroutine metal_dudxyz
452 end interface
453
454 interface
455 subroutine metal_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
456 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
457 bind(c, name = 'metal_cdtp')
458 use, intrinsic :: iso_c_binding
459 type(c_ptr), value :: dtx_d, x_d, dr_d, ds_d, dt_d
460 type(c_ptr), value :: dxt_d, dyt_d, dzt_d, w3_d
461 integer(c_int) :: nel, lx
462 end subroutine metal_cdtp
463 end interface
464
465 interface
466 subroutine metal_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
467 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
468 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
469 jacinv_d, nel, gdim, lx) &
470 bind(c, name = 'metal_conv1')
471 use, intrinsic :: iso_c_binding
472 type(c_ptr), value :: du_d, u_d, vx_d, vy_d, vz_d
473 type(c_ptr), value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
474 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
475 type(c_ptr), value :: jacinv_d
476 integer(c_int) :: nel, gdim, lx
477 end subroutine metal_conv1
478 end interface
479
480 interface
481 subroutine metal_convect_scalar(du_d, u_d, cr_d, cs_d, ct_d, &
482 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'metal_convect_scalar')
483 use, intrinsic :: iso_c_binding
484 type(c_ptr), value :: du_d, u_d
485 type(c_ptr), value :: cr_d, cs_d, ct_d
486 type(c_ptr), value :: dx_d, dy_d, dz_d
487 integer(c_int) :: nel, lx
488 end subroutine metal_convect_scalar
489 end interface
490
491 interface
492 subroutine metal_opgrad(ux_d, uy_d, uz_d, u_d, &
493 dx_d, dy_d, dz_d, &
494 drdx_d, dsdx_d, dtdx_d, &
495 drdy_d, dsdy_d, dtdy_d, &
496 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
497 bind(c, name = 'metal_opgrad')
498 use, intrinsic :: iso_c_binding
499 type(c_ptr), value :: ux_d, uy_d, uz_d, u_d
500 type(c_ptr), value :: dx_d, dy_d, dz_d
501 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
502 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
503 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
504 type(c_ptr), value :: w3_d
505 integer(c_int) :: nel, lx
506 end subroutine metal_opgrad
507 end interface
508
509 interface
510 real(c_rp) function metal_cfl(dt, u_d, v_d, w_d, &
511 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
512 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
513 jacinv_d, nel, lx) &
514 bind(c, name = 'metal_cfl')
515 use, intrinsic :: iso_c_binding
516 import c_rp
517 type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
518 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
519 type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
520 real(c_rp) :: dt
521 integer(c_int) :: nel, lx
522 end function metal_cfl
523 end interface
524
525 interface
526 subroutine metal_lambda2(lambda2_d, u_d, v_d, w_d, &
527 dx_d, dy_d, dz_d, &
528 drdx_d, dsdx_d, dtdx_d, &
529 drdy_d, dsdy_d, dtdy_d, &
530 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
531 bind(c, name = 'metal_lambda2')
532 use, intrinsic :: iso_c_binding
533 type(c_ptr), value :: lambda2_d, u_d, v_d, w_d
534 type(c_ptr), value :: dx_d, dy_d, dz_d
535 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
536 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
537 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
538 type(c_ptr), value :: jacinv_d
539 integer(c_int) :: nel, lx
540 end subroutine metal_lambda2
541 end interface
542
543 interface
544 subroutine metal_rotate_cyc(vx_d, vy_d, vz_d, &
545 x_d, y_d, z_d, &
546 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
547 bind(c, name = 'metal_rotate_cyc')
548 use, intrinsic :: iso_c_binding
549 type(c_ptr), value :: vx_d, vy_d, vz_d
550 type(c_ptr), value :: x_d, y_d, z_d
551 type(c_ptr), value :: cyc_msk_d, R11_d, R12_d
552 integer(c_int) :: ncyc, idir
553 end subroutine metal_rotate_cyc
554 end interface
555
556 interface
557 subroutine metal_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
558 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
559 dtdz_d, w3_d, nel, lx) bind(c, name = 'metal_set_convect_rst')
560 use, intrinsic :: iso_c_binding
561 type(c_ptr), value :: cr_d, cs_d, ct_d
562 type(c_ptr), value :: cx_d, cy_d, cz_d
563 type(c_ptr), value :: drdx_d, dsdx_d, dtdx_d
564 type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d
565 type(c_ptr), value :: drdz_d, dsdz_d, dtdz_d
566 type(c_ptr), value :: w3_d
567 integer(c_int) :: nel, lx
568 end subroutine metal_set_convect_rst
569 end interface
570
571#endif
572
573contains
574
575 subroutine opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
576 type(coef_t), intent(in), target :: coef
577 type(c_ptr), intent(inout) :: du_d
578 type(c_ptr), intent(in) :: u_d, dr_d, ds_d, dt_d
579
580 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
581#ifdef HAVE_HIP
582 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
583 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
584 msh%nelv, xh%lx)
585#elif HAVE_CUDA
586 call cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
587 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
588 msh%nelv, xh%lx)
589#elif HAVE_OPENCL
590 call opencl_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
591 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
592 msh%nelv, xh%lx)
593#elif HAVE_METAL
594 call metal_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
595 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
596 msh%nelv, xh%lx)
597#else
598 call neko_error('No device backend configured')
599#endif
600 end associate
601
602 end subroutine opr_device_dudxyz
603
604 subroutine opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
605 type(coef_t), intent(in), target :: coef
606 type(c_ptr), intent(inout) :: ux_d, uy_d, uz_d
607 type(c_ptr), intent(in) :: u_d
608
609 associate(xh => coef%Xh, msh => coef%msh)
610#ifdef HAVE_HIP
611 call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
612 xh%dx_d, xh%dy_d, xh%dz_d, &
613 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
614 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
615 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
616 xh%w3_d, msh%nelv, xh%lx)
617#elif HAVE_CUDA
618 call cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
619 xh%dx_d, xh%dy_d, xh%dz_d, &
620 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
621 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
622 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
623 xh%w3_d, msh%nelv, xh%lx)
624#elif HAVE_OPENCL
625 call opencl_opgrad(ux_d, uy_d, uz_d, u_d, &
626 xh%dx_d, xh%dy_d, xh%dz_d, &
627 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
628 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
629 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
630 xh%w3_d, msh%nelv, xh%lx)
631#elif HAVE_METAL
632 call metal_opgrad(ux_d, uy_d, uz_d, u_d, &
633 xh%dx_d, xh%dy_d, xh%dz_d, &
634 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
635 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
636 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
637 xh%w3_d, msh%nelv, xh%lx)
638#else
639 call neko_error('No device backend configured')
640#endif
641 end associate
642
643 end subroutine opr_device_opgrad
644
650 subroutine device_ortho(x_d, glb_n_points, n)
651 integer, intent(in) :: n
652 integer(kind=i8), intent(in) :: glb_n_points
653 type(c_ptr), intent(inout) :: x_d
654 real(kind=rp) :: c
655
656 c = device_glsum(x_d, n) / glb_n_points
657 call device_cadd(x_d, -c, n)
658
659 end subroutine device_ortho
660
661 subroutine opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
662 type(coef_t), intent(in) :: coef
663 type(c_ptr), intent(inout) :: lambda2_d
664 type(c_ptr), intent(in) :: u_d, v_d, w_d
665#ifdef HAVE_HIP
666 call hip_lambda2(lambda2_d, u_d, v_d, w_d, &
667 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
668 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
669 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
670 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
671 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
672#elif HAVE_CUDA
673 call cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
674 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
675 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
676 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
677 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
678 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
679#elif HAVE_OPENCL
680 call opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
681 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
682 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
683 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
684 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
685 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
686#elif HAVE_METAL
687 call metal_lambda2(lambda2_d, u_d, v_d, w_d, &
688 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
689 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
690 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
691 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
692 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
693#else
694 call neko_error('No device backend configured')
695#endif
696 end subroutine opr_device_lambda2
697
698 subroutine opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
699 type(coef_t), intent(in), target :: coef
700 type(c_ptr), intent(inout) :: dtx_d, x_d
701 type(c_ptr), intent(in) :: dr_d, ds_d, dt_d
702
703 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
704#ifdef HAVE_HIP
705 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
706 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
707 msh%nelv, xh%lx)
708#elif HAVE_CUDA
709 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
710 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
711 msh%nelv, xh%lx)
712#elif HAVE_OPENCL
713 call opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
714 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
715 msh%nelv, xh%lx)
716#elif HAVE_METAL
717 call metal_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
718 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
719 msh%nelv, xh%lx)
720#else
721 call neko_error('No device backend configured')
722#endif
723 end associate
724
725 end subroutine opr_device_cdtp
726
727 subroutine opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, Xh, coef, nelv, gdim)
728 type(space_t), intent(in) :: xh
729 type(coef_t), intent(in), target :: coef
730 integer, intent(in) :: nelv, gdim
731 type(c_ptr), intent(inout) :: du_d
732 type(c_ptr), intent(in) :: u_d, vx_d, vy_d, vz_d
733
734 associate(msh => coef%msh, dof => coef%dof)
735#ifdef HAVE_HIP
736 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
737 xh%dx_d, xh%dy_d, xh%dz_d, &
738 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
739 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
740 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
741 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
742#elif HAVE_CUDA
743 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
744 xh%dx_d, xh%dy_d, xh%dz_d, &
745 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
746 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
747 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
748 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
749#elif HAVE_OPENCL
750 call opencl_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
751 xh%dx_d, xh%dy_d, xh%dz_d, &
752 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
753 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
754 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
755 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
756#elif HAVE_METAL
757 call metal_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
758 xh%dx_d, xh%dy_d, xh%dz_d, &
759 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
760 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
761 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
762 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
763#else
764 call neko_error('No device backend configured')
765#endif
766 end associate
767
768 end subroutine opr_device_conv1
769
770 subroutine opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, &
771 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
772 type(space_t), intent(in) :: xh_gl
773 type(space_t), intent(in) :: xh_gll
774 type(coef_t), intent(in) :: coef_gll
775 type(coef_t), intent(in) :: coef_gl
776 type(interpolator_t), intent(inout) :: gll_to_gl
777 real(kind=rp), intent(inout) :: &
778 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
779 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
780 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
781 type(c_ptr) :: du_d, ud_d
782 integer :: n_gl, n_gll
783
784 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
785 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
786
787 call device_map(ud, ud_d, n_gl)
788
789 du_d = device_get_ptr(du)
790
791 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
792#ifdef HAVE_HIP
793 call hip_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
794 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
795#elif HAVE_CUDA
796 call cuda_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
797 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
798#elif HAVE_OPENCL
799 call opencl_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
800 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
801#elif HAVE_METAL
802 call metal_convect_scalar(ud_d, u_d, cr_d, cs_d, ct_d, &
803 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
804#else
805 call neko_error('No device backend configured')
806#endif
807
808 call gll_to_gl%map(du, ud, nelv, xh_gll)
809 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
810 call device_col2(du_d, coef_gll%Binv_d, n_gll)
811
812 end associate
813
814 call device_unmap(ud, ud_d)
815
816 end subroutine opr_device_convect_scalar
817
818 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
819 type(field_t), intent(inout) :: w1
820 type(field_t), intent(inout) :: w2
821 type(field_t), intent(inout) :: w3
822 type(field_t), intent(in) :: u1
823 type(field_t), intent(in) :: u2
824 type(field_t), intent(in) :: u3
825 type(field_t), intent(inout) :: work1
826 type(field_t), intent(inout) :: work2
827 type(coef_t), intent(in) :: c_xh
828 type(c_ptr), optional, intent(inout) :: event
829 integer :: gdim, n, nelv
830
831 n = w1%dof%size()
832 gdim = c_xh%msh%gdim
833 nelv = c_xh%msh%nelv
834
835 ! this%work1=dw/dy ; this%work2=dv/dz
836#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL) || defined(HAVE_METAL)
837#ifdef HAVE_HIP
838 call hip_dudxyz(work1%x_d, u3%x_d, &
839 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
840 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
841 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
842#elif HAVE_CUDA
843 call cuda_dudxyz(work1%x_d, u3%x_d, &
844 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
845 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
846 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
847#elif HAVE_OPENCL
848 call opencl_dudxyz(work1%x_d, u3%x_d, &
849 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
850 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
851 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
852#elif HAVE_METAL
853 call metal_dudxyz(work1%x_d, u3%x_d, &
854 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
855 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
856 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
857#endif
858 if (gdim .eq. 3) then
859#ifdef HAVE_HIP
860 call hip_dudxyz(work2%x_d, u2%x_d, &
861 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
862 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
863 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
864#elif HAVE_CUDA
865 call cuda_dudxyz(work2%x_d, u2%x_d, &
866 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
867 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
868 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
869#elif HAVE_OPENCL
870 call opencl_dudxyz(work2%x_d, u2%x_d, &
871 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
872 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
873 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
874#elif HAVE_METAL
875 call metal_dudxyz(work2%x_d, u2%x_d, &
876 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
877 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
878 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
879#endif
880 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
881 else
882 call device_copy(w1%x_d, work1%x_d, n)
883 endif
884 ! this%work1=du/dz ; this%work2=dw/dx
885 if (gdim .eq. 3) then
886#ifdef HAVE_HIP
887 call hip_dudxyz(work1%x_d, u1%x_d, &
888 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
889 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
890 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
891 call hip_dudxyz(work2%x_d, u3%x_d, &
892 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
893 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
894 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
895#elif HAVE_CUDA
896 call cuda_dudxyz(work1%x_d, u1%x_d, &
897 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
898 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
899 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
900 call cuda_dudxyz(work2%x_d, u3%x_d, &
901 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
902 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
903 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
904#elif HAVE_OPENCL
905 call opencl_dudxyz(work1%x_d, u1%x_d, &
906 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
907 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
908 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
909 call opencl_dudxyz(work2%x_d, u3%x_d, &
910 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
911 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
912 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
913#elif HAVE_METAL
914 call metal_dudxyz(work1%x_d, u1%x_d, &
915 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
916 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
917 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
918 call metal_dudxyz(work2%x_d, u3%x_d, &
919 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
920 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
921 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
922#endif
923 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
924 else
925 call device_rzero (work1%x_d, n)
926#ifdef HAVE_HIP
927 call hip_dudxyz(work2%x_d, u3%x_d, &
928 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
929 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
930 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
931#elif HAVE_CUDA
932 call cuda_dudxyz(work2%x_d, u3%x_d, &
933 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
934 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
935 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
936#elif HAVE_OPENCL
937 call opencl_dudxyz(work2%x_d, u3%x_d, &
938 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
939 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
940 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
941#elif HAVE_METAL
942 call metal_dudxyz(work2%x_d, u3%x_d, &
943 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
944 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
945 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
946#endif
947 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
948 endif
949 ! this%work1=dv/dx ; this%work2=du/dy
950#ifdef HAVE_HIP
951 call hip_dudxyz(work1%x_d, u2%x_d, &
952 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
953 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
954 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
955 call hip_dudxyz(work2%x_d, u1%x_d, &
956 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
957 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
958 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
959#elif HAVE_CUDA
960 call cuda_dudxyz(work1%x_d, u2%x_d, &
961 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
962 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
963 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
964 call cuda_dudxyz(work2%x_d, u1%x_d, &
965 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
966 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
967 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
968#elif HAVE_OPENCL
969 call opencl_dudxyz(work1%x_d, u2%x_d, &
970 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
971 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
972 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
973 call opencl_dudxyz(work2%x_d, u1%x_d, &
974 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
975 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
976 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
977#elif HAVE_METAL
978 call metal_dudxyz(work1%x_d, u2%x_d, &
979 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
980 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
981 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
982 call metal_dudxyz(work2%x_d, u1%x_d, &
983 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
984 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
985 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
986#endif
987 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
988 !! BC dependent, Needs to change if cyclic
989
990 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
991
992 if (present(event)) then
993 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 1, c_xh)
994 call c_xh%gs_h%op(w1, gs_op_add, event)
995 call device_event_sync(event)
996 call c_xh%gs_h%op(w2, gs_op_add, event)
997 call device_event_sync(event)
998 call c_xh%gs_h%op(w3, gs_op_add, event)
999 call device_event_sync(event)
1000 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 0, c_xh)
1001 else
1002 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 1, c_xh)
1003 call c_xh%gs_h%op(w1, gs_op_add)
1004 call c_xh%gs_h%op(w2, gs_op_add)
1005 call c_xh%gs_h%op(w3, gs_op_add)
1006 if(c_xh%cyclic) call opr_device_rotate_cyc(w1%x_d, w2%x_d, w3%x_d, 0, c_xh)
1007 end if
1008
1009 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
1010
1011#else
1012 call neko_error('No device backend configured')
1013#endif
1014
1015 end subroutine opr_device_curl
1016
1017 function opr_device_cfl(dt, u_d, v_d, w_d, Xh, coef, nelv, gdim) result(cfl)
1018 type(space_t) :: xh
1019 type(coef_t) :: coef
1020 integer :: nelv, gdim
1021 real(kind=rp) :: dt
1022 type(c_ptr), intent(in) :: u_d, v_d, w_d
1023 real(kind=rp) :: cfl
1024
1025#ifdef HAVE_HIP
1026 cfl = hip_cfl(dt, u_d, v_d, w_d, &
1027 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1028 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1029 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1030 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
1031 coef%jacinv_d, nelv, xh%lx)
1032#elif HAVE_CUDA
1033 cfl = cuda_cfl(dt, u_d, v_d, w_d, &
1034 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1035 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1036 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1037 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
1038 coef%jacinv_d, nelv, xh%lx)
1039#elif HAVE_OPENCL
1040 cfl = opencl_cfl(dt, u_d, v_d, w_d, &
1041 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1042 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1043 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1044 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
1045 coef%jacinv_d, nelv, xh%lx)
1046#elif HAVE_METAL
1047 cfl = metal_cfl(dt, u_d, v_d, w_d, &
1048 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1049 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1050 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1051 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
1052 coef%jacinv_d, nelv, xh%lx)
1053#else
1054 cfl = 0.0_rp
1055 call neko_error('No device backend configured')
1056#endif
1057 end function opr_device_cfl
1058
1059 subroutine opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1060 type(c_ptr), intent(inout) :: vx_d, vy_d, vz_d
1061 integer, intent(in) :: idir
1062 type(coef_t), intent(in) :: coef
1063 integer :: ncyc
1064
1065 ncyc = coef%cyc_msk(0) - 1
1066
1067 if (ncyc .le. 0) return
1068
1069#ifdef HAVE_HIP
1070 call hip_rotate_cyc(vx_d, vy_d, vz_d, &
1071 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
1072 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
1073 ncyc, idir)
1074#elif HAVE_CUDA
1075 call cuda_rotate_cyc(vx_d, vy_d, vz_d, &
1076 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
1077 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
1078 ncyc, idir)
1079#elif HAVE_OPENCL
1080 call opencl_rotate_cyc(vx_d, vy_d, vz_d, &
1081 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
1082 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
1083 ncyc, idir)
1084#elif HAVE_METAL
1085 call metal_rotate_cyc(vx_d, vy_d, vz_d, &
1086 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
1087 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
1088 ncyc, idir)
1089#else
1090 call neko_error('No device backend configured for rotate_cyc')
1091#endif
1092 end subroutine opr_device_rotate_cyc
1093
1094 subroutine opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
1095 Xh, coef)
1096 type(space_t), intent(inout) :: xh
1097 type(coef_t), intent(inout) :: coef
1098 type(c_ptr), intent(inout) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
1099
1100#ifdef HAVE_HIP
1101 call hip_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
1102 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1103 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1104 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1105 xh%w3_d, coef%msh%nelv, xh%lx)
1106#elif HAVE_CUDA
1107 call cuda_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
1108 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1109 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1110 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1111 xh%w3_d, coef%msh%nelv, xh%lx)
1112#elif HAVE_OPENCL
1113 call opencl_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
1114 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1115 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1116 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1117 xh%w3_d, coef%msh%nelv, xh%lx)
1118#elif HAVE_METAL
1119 call metal_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, &
1120 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
1121 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
1122 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
1123 xh%w3_d, coef%msh%nelv, xh%lx)
1124#else
1125 call neko_error('No device backend configured')
1126#endif
1127
1128 end subroutine opr_device_set_convect_rst
1129
1130end module opr_device
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Coefficients.
Definition coef.f90:34
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
Defines a field.
Definition field.f90:34
Gather-scatter.
Routines to interpolate between different spaces.
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
subroutine, public opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
real(kind=rp) function, public opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
subroutine, public opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
subroutine, public device_ortho(x_d, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
subroutine, public opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
subroutine, public opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.c:57
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
Definition opr_cdtp.cu:57
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.c:54
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
Definition opr_cfl.cu:64
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.c:57
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
Definition opr_conv1.cu:60
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.c:57
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
Definition opr_dudxyz.cu:57
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
Definition opr_lambda2.c:53
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.c:57
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Definition opr_opgrad.cu:59
void opencl_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63