45 use,
intrinsic :: iso_c_binding
57 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
58 bind(c, name =
'hip_dudxyz')
59 use,
intrinsic :: iso_c_binding
60 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
61 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
62 integer(c_int) :: nel, lx
67 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
68 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
69 bind(c, name =
'hip_cdtp')
70 use,
intrinsic :: iso_c_binding
71 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
72 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
73 integer(c_int) :: nel, lx
79 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
80 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
81 jacinv_d, nel, gdim, lx) &
82 bind(c, name =
'hip_conv1')
83 use,
intrinsic :: iso_c_binding
84 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
85 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
86 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
87 type(c_ptr),
value :: jacinv_d
88 integer(c_int) :: nel, gdim, lx
94 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
95 use,
intrinsic :: iso_c_binding
96 type(c_ptr),
value :: du_d, u_d
97 type(c_ptr),
value :: cr_d, cs_d, ct_d
98 type(c_ptr),
value :: dx_d, dy_d, dz_d
99 integer(c_int) :: nel, lx
106 drdx_d, dsdx_d, dtdx_d, &
107 drdy_d, dsdy_d, dtdy_d, &
108 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
109 bind(c, name =
'hip_opgrad')
110 use,
intrinsic :: iso_c_binding
111 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
112 type(c_ptr),
value :: dx_d, dy_d, dz_d
113 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
114 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
115 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
116 type(c_ptr),
value :: w3_d
117 integer(c_int) :: nel, lx
124 drdx_d, dsdx_d, dtdx_d, &
125 drdy_d, dsdy_d, dtdy_d, &
126 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
127 bind(c, name =
'hip_lambda2')
128 use,
intrinsic :: iso_c_binding
129 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
130 type(c_ptr),
value :: dx_d, dy_d, dz_d
131 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
132 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
133 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
134 type(c_ptr),
value :: jacinv_d
135 integer(c_int) :: nel, lx
141 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
142 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
144 bind(c, name =
'hip_cfl')
145 use,
intrinsic :: iso_c_binding
147 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
148 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
149 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
151 integer(c_int) :: nel, lx
158 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
159 bind(c, name =
'hip_rotate_cyc')
160 use,
intrinsic :: iso_c_binding
161 type(c_ptr),
value :: vx_d, vy_d, vz_d
162 type(c_ptr),
value :: x_d, y_d, z_d
163 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
164 integer(c_int) :: ncyc, idir
170 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
171 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
172 use,
intrinsic :: iso_c_binding
173 type(c_ptr),
value :: cr_d, cs_d, ct_d
174 type(c_ptr),
value :: cx_d, cy_d, cz_d
175 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
176 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
177 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
178 type(c_ptr),
value :: w3_d
179 integer(c_int) :: nel, lx
185 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
186 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
187 bind(c, name =
'cuda_dudxyz')
188 use,
intrinsic :: iso_c_binding
189 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
190 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
191 integer(c_int) :: nel, lx
196 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
197 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
198 bind(c, name =
'cuda_cdtp')
199 use,
intrinsic :: iso_c_binding
200 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
201 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
202 integer(c_int) :: nel, lx
207 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
208 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
209 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
210 jacinv_d, nel, gdim, lx) &
211 bind(c, name =
'cuda_conv1')
212 use,
intrinsic :: iso_c_binding
213 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
214 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
215 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
216 type(c_ptr),
value :: jacinv_d
217 integer(c_int) :: nel, gdim, lx
223 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
224 use,
intrinsic :: iso_c_binding
225 type(c_ptr),
value :: du_d, u_d
226 type(c_ptr),
value :: cr_d, cs_d, ct_d
227 type(c_ptr),
value :: dx_d, dy_d, dz_d
228 integer(c_int) :: nel, lx
235 drdx_d, dsdx_d, dtdx_d, &
236 drdy_d, dsdy_d, dtdy_d, &
237 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
238 bind(c, name =
'cuda_opgrad')
239 use,
intrinsic :: iso_c_binding
240 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
241 type(c_ptr),
value :: dx_d, dy_d, dz_d
242 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
243 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
244 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
245 type(c_ptr),
value :: w3_d
246 integer(c_int) :: nel, lx
253 drdx_d, dsdx_d, dtdx_d, &
254 drdy_d, dsdy_d, dtdy_d, &
255 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
256 bind(c, name =
'cuda_lambda2')
257 use,
intrinsic :: iso_c_binding
258 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
259 type(c_ptr),
value :: dx_d, dy_d, dz_d
260 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
261 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
262 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
263 type(c_ptr),
value :: jacinv_d
264 integer(c_int) :: nel, lx
269 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
270 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
271 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
273 bind(c, name =
'cuda_cfl')
274 use,
intrinsic :: iso_c_binding
276 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
277 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
278 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
280 integer(c_int) :: nel, lx
287 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
288 bind(c, name =
'cuda_rotate_cyc')
289 use,
intrinsic :: iso_c_binding
290 type(c_ptr),
value :: vx_d, vy_d, vz_d
291 type(c_ptr),
value :: x_d, y_d, z_d
292 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
293 integer(c_int) :: ncyc, idir
299 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
300 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
301 use,
intrinsic :: iso_c_binding
302 type(c_ptr),
value :: cr_d, cs_d, ct_d
303 type(c_ptr),
value :: cx_d, cy_d, cz_d
304 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
305 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
306 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
307 type(c_ptr),
value :: w3_d
308 integer(c_int) :: nel, lx
315 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
316 bind(c, name =
'opencl_dudxyz')
317 use,
intrinsic :: iso_c_binding
318 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
319 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
320 integer(c_int) :: nel, lx
325 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
326 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
327 bind(c, name =
'opencl_cdtp')
328 use,
intrinsic :: iso_c_binding
329 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
330 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
331 integer(c_int) :: nel, lx
337 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
338 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
339 jacinv_d, nel, gdim, lx) &
340 bind(c, name =
'opencl_conv1')
341 use,
intrinsic :: iso_c_binding
342 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
343 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
344 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
345 type(c_ptr),
value :: jacinv_d
346 integer(c_int) :: nel, gdim, lx
352 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
353 use,
intrinsic :: iso_c_binding
354 type(c_ptr),
value :: du_d, u_d
355 type(c_ptr),
value :: cr_d, cs_d, ct_d
356 type(c_ptr),
value :: dx_d, dy_d, dz_d
357 integer(c_int) :: nel, lx
364 drdx_d, dsdx_d, dtdx_d, &
365 drdy_d, dsdy_d, dtdy_d, &
366 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
367 bind(c, name =
'opencl_opgrad')
368 use,
intrinsic :: iso_c_binding
369 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
370 type(c_ptr),
value :: dx_d, dy_d, dz_d
371 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
372 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
373 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
374 type(c_ptr),
value :: w3_d
375 integer(c_int) :: nel, lx
380 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
381 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
382 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
384 bind(c, name =
'opencl_cfl')
385 use,
intrinsic :: iso_c_binding
387 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
388 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
389 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
391 integer(c_int) :: nel, lx
398 drdx_d, dsdx_d, dtdx_d, &
399 drdy_d, dsdy_d, dtdy_d, &
400 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
401 bind(c, name =
'opencl_lambda2')
402 use,
intrinsic :: iso_c_binding
403 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
404 type(c_ptr),
value :: dx_d, dy_d, dz_d
405 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
406 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
407 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
408 type(c_ptr),
value :: jacinv_d
409 integer(c_int) :: nel, lx
415 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
416 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
417 use,
intrinsic :: iso_c_binding
418 type(c_ptr),
value :: cr_d, cs_d, ct_d
419 type(c_ptr),
value :: cx_d, cy_d, cz_d
420 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
421 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
422 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
423 type(c_ptr),
value :: w3_d
424 integer(c_int) :: nel, lx
433 type(
coef_t),
intent(in),
target :: coef
434 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(inout) :: du
435 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(in) :: u, dr, ds, dt
436 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
445 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
447 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
448 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
452 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
456 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
459 call neko_error(
'No device backend configured')
466 type(
coef_t),
intent(in) :: coef
467 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
468 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
469 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
470 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
471 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
479 associate(xh => coef%Xh, msh => coef%msh)
482 xh%dx_d, xh%dy_d, xh%dz_d, &
483 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
484 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
485 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
486 xh%w3_d, msh%nelv, xh%lx)
489 xh%dx_d, xh%dy_d, xh%dz_d, &
490 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
491 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
492 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
493 xh%w3_d, msh%nelv, xh%lx)
496 xh%dx_d, xh%dy_d, xh%dz_d, &
497 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
498 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
499 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
500 xh%w3_d, msh%nelv, xh%lx)
502 call neko_error(
'No device backend configured')
508 type(
coef_t),
intent(in) :: coef
510 type(
field_t),
intent(in) :: u, v, w
513 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
514 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
515 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
516 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
517 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
520 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
521 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
522 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
523 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
524 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
527 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
528 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
529 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
530 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
531 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
533 call neko_error(
'No device backend configured')
538 type(
coef_t),
intent(in) :: coef
539 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
540 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
541 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
542 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
543 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
544 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
553 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
555 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
556 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
559 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
560 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
564 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
567 call neko_error(
'No device backend configured')
574 type(
space_t),
intent(in) :: xh
576 integer,
intent(in) :: nelv, gdim
577 real(kind=
rp),
intent(inout) :: du(xh%lxyz, nelv)
578 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
579 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
580 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
581 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
582 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
591 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
593 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
594 xh%dx_d, xh%dy_d, xh%dz_d, &
595 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
596 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
597 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
598 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
600 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
601 xh%dx_d, xh%dy_d, xh%dz_d, &
602 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
603 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
604 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
605 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
608 xh%dx_d, xh%dy_d, xh%dz_d, &
609 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
610 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
611 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
612 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
614 call neko_error(
'No device backend configured')
621 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
622 type(space_t),
intent(in) :: xh_gl
623 type(space_t),
intent(in) :: xh_gll
624 type(coef_t),
intent(in) :: coef_gll
625 type(coef_t),
intent(in) :: coef_gl
626 type(interpolator_t),
intent(inout) :: gll_to_gl
627 real(kind=rp),
intent(inout) :: &
628 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
629 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
630 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
631 type(c_ptr) :: du_d, ud_d
632 integer :: n_gl, n_gll
634 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
635 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
637 call device_map(ud, ud_d, n_gl)
639 du_d = device_get_ptr(du)
641 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
644 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
647 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
650 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
652 call neko_error(
'No device backend configured')
655 call gll_to_gl%map(du, ud, nelv, xh_gll)
656 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
657 call device_col2(du_d, coef_gll%Binv_d, n_gll)
661 call device_free(ud_d)
665 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
666 type(field_t),
intent(inout) :: w1
667 type(field_t),
intent(inout) :: w2
668 type(field_t),
intent(inout) :: w3
669 type(field_t),
intent(in) :: u1
670 type(field_t),
intent(in) :: u2
671 type(field_t),
intent(in) :: u3
672 type(field_t),
intent(inout) :: work1
673 type(field_t),
intent(inout) :: work2
674 type(coef_t),
intent(in) :: c_xh
675 type(c_ptr),
optional,
intent(inout) :: event
676 integer :: gdim, n, nelv
683#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
686 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
687 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
688 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
691 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
692 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
693 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
696 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
697 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
698 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
700 if (gdim .eq. 3)
then
703 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
704 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
705 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
708 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
709 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
710 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
713 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
714 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
715 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
717 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
719 call device_copy(w1%x_d, work1%x_d, n)
722 if (gdim .eq. 3)
then
725 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
726 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
727 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
729 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
730 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
731 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
734 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
735 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
736 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
738 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
739 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
740 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
743 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
744 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
745 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
747 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
748 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
749 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
751 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
753 call device_rzero (work1%x_d, n)
756 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
757 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
758 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
761 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
762 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
763 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
770 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
775 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
776 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
777 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
779 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
780 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
781 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
784 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
785 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
786 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
788 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
789 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
790 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
793 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
794 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
795 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
797 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
798 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
799 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
801 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
804 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
806 if (
present(event))
then
808 call c_xh%gs_h%op(w1, gs_op_add, event)
809 call device_event_sync(event)
810 call c_xh%gs_h%op(w2, gs_op_add, event)
811 call device_event_sync(event)
812 call c_xh%gs_h%op(w3, gs_op_add, event)
813 call device_event_sync(event)
817 call c_xh%gs_h%op(w1, gs_op_add)
818 call c_xh%gs_h%op(w2, gs_op_add)
819 call c_xh%gs_h%op(w3, gs_op_add)
823 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
826 call neko_error(
'No device backend configured')
831 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
result(cfl)
834 integer :: nelv, gdim
836 real(kind=rp),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
838 type(c_ptr) :: u_d, v_d, w_d
840 u_d = device_get_ptr(u)
841 v_d = device_get_ptr(v)
842 w_d = device_get_ptr(w)
845 cfl =
hip_cfl(dt, u_d, v_d, w_d, &
846 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
847 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
848 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
849 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
850 coef%jacinv_d, nelv, xh%lx)
853 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
854 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
855 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
856 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
857 coef%jacinv_d, nelv, xh%lx)
860 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
861 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
862 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
863 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
864 coef%jacinv_d, nelv, xh%lx)
867 call neko_error(
'No device backend configured')
873 integer :: idir, ncyc
874 real(rp),
dimension(coef%Xh%lx*coef%Xh%ly*coef%Xh%lz*coef%msh%nelv) :: &
876 type(c_ptr) :: vx_d, vy_d, vz_d
878 vx_d = device_get_ptr(vx)
879 vy_d = device_get_ptr(vy)
880 vz_d = device_get_ptr(vz)
881 ncyc = coef%cyc_msk(0) - 1
885 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
886 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
890 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
891 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
895 call neko_error(
'No device backend configured for rotate_cyc')
897 call neko_error(
'No device backend configured for rotate_cyc')
903 integer :: idir, ncyc
904 real(rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
906 type(c_ptr) :: vx_d, vy_d, vz_d
908 vx_d = device_get_ptr(vx)
909 vy_d = device_get_ptr(vy)
910 vz_d = device_get_ptr(vz)
911 ncyc = coef%cyc_msk(0) - 1
915 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
916 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
920 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
921 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
925 call neko_error(
'No device backend configured for rotate_cyc')
927 call neko_error(
'No device backend configured for rotate_cyc')
933 type(space_t),
intent(inout) :: xh
934 type(coef_t),
intent(inout) :: coef
935 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
939 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
940 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
941 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
942 xh%w3_d, coef%msh%nelv, xh%lx)
945 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
946 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
947 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
948 xh%w3_d, coef%msh%nelv, xh%lx)
951 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
952 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
953 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
954 xh%w3_d, coef%msh%nelv, xh%lx)
956 call neko_error(
'No device backend configured')
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine, public device_free(x_d)
Deallocate memory on the device.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.