46 use,
intrinsic :: iso_c_binding
59 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
60 bind(c, name =
'hip_dudxyz')
61 use,
intrinsic :: iso_c_binding
62 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
63 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
64 integer(c_int) :: nel, lx
69 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
70 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
71 bind(c, name =
'hip_cdtp')
72 use,
intrinsic :: iso_c_binding
73 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
74 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
75 integer(c_int) :: nel, lx
81 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
82 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
83 jacinv_d, nel, gdim, lx) &
84 bind(c, name =
'hip_conv1')
85 use,
intrinsic :: iso_c_binding
86 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
87 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
88 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
89 type(c_ptr),
value :: jacinv_d
90 integer(c_int) :: nel, gdim, lx
96 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
97 use,
intrinsic :: iso_c_binding
98 type(c_ptr),
value :: du_d, u_d
99 type(c_ptr),
value :: cr_d, cs_d, ct_d
100 type(c_ptr),
value :: dx_d, dy_d, dz_d
101 integer(c_int) :: nel, lx
108 drdx_d, dsdx_d, dtdx_d, &
109 drdy_d, dsdy_d, dtdy_d, &
110 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
111 bind(c, name =
'hip_opgrad')
112 use,
intrinsic :: iso_c_binding
113 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
114 type(c_ptr),
value :: dx_d, dy_d, dz_d
115 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
116 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
117 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
118 type(c_ptr),
value :: w3_d
119 integer(c_int) :: nel, lx
126 drdx_d, dsdx_d, dtdx_d, &
127 drdy_d, dsdy_d, dtdy_d, &
128 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
129 bind(c, name =
'hip_lambda2')
130 use,
intrinsic :: iso_c_binding
131 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
132 type(c_ptr),
value :: dx_d, dy_d, dz_d
133 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
134 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
135 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
136 type(c_ptr),
value :: jacinv_d
137 integer(c_int) :: nel, lx
143 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
144 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
146 bind(c, name =
'hip_cfl')
147 use,
intrinsic :: iso_c_binding
149 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
150 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
151 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
153 integer(c_int) :: nel, lx
160 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
161 bind(c, name =
'hip_rotate_cyc')
162 use,
intrinsic :: iso_c_binding
163 type(c_ptr),
value :: vx_d, vy_d, vz_d
164 type(c_ptr),
value :: x_d, y_d, z_d
165 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
166 integer(c_int) :: ncyc, idir
172 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
173 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
174 use,
intrinsic :: iso_c_binding
175 type(c_ptr),
value :: cr_d, cs_d, ct_d
176 type(c_ptr),
value :: cx_d, cy_d, cz_d
177 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
178 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
179 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
180 type(c_ptr),
value :: w3_d
181 integer(c_int) :: nel, lx
187 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
188 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
189 bind(c, name =
'cuda_dudxyz')
190 use,
intrinsic :: iso_c_binding
191 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
192 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
193 integer(c_int) :: nel, lx
198 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
199 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
200 bind(c, name =
'cuda_cdtp')
201 use,
intrinsic :: iso_c_binding
202 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
203 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
204 integer(c_int) :: nel, lx
209 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
210 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
211 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
212 jacinv_d, nel, gdim, lx) &
213 bind(c, name =
'cuda_conv1')
214 use,
intrinsic :: iso_c_binding
215 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
216 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
217 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
218 type(c_ptr),
value :: jacinv_d
219 integer(c_int) :: nel, gdim, lx
225 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
226 use,
intrinsic :: iso_c_binding
227 type(c_ptr),
value :: du_d, u_d
228 type(c_ptr),
value :: cr_d, cs_d, ct_d
229 type(c_ptr),
value :: dx_d, dy_d, dz_d
230 integer(c_int) :: nel, lx
237 drdx_d, dsdx_d, dtdx_d, &
238 drdy_d, dsdy_d, dtdy_d, &
239 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
240 bind(c, name =
'cuda_opgrad')
241 use,
intrinsic :: iso_c_binding
242 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
243 type(c_ptr),
value :: dx_d, dy_d, dz_d
244 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
245 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
246 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
247 type(c_ptr),
value :: w3_d
248 integer(c_int) :: nel, lx
255 drdx_d, dsdx_d, dtdx_d, &
256 drdy_d, dsdy_d, dtdy_d, &
257 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
258 bind(c, name =
'cuda_lambda2')
259 use,
intrinsic :: iso_c_binding
260 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
261 type(c_ptr),
value :: dx_d, dy_d, dz_d
262 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
263 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
264 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
265 type(c_ptr),
value :: jacinv_d
266 integer(c_int) :: nel, lx
271 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
272 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
273 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
275 bind(c, name =
'cuda_cfl')
276 use,
intrinsic :: iso_c_binding
278 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
279 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
280 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
282 integer(c_int) :: nel, lx
289 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
290 bind(c, name =
'cuda_rotate_cyc')
291 use,
intrinsic :: iso_c_binding
292 type(c_ptr),
value :: vx_d, vy_d, vz_d
293 type(c_ptr),
value :: x_d, y_d, z_d
294 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
295 integer(c_int) :: ncyc, idir
301 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
302 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
303 use,
intrinsic :: iso_c_binding
304 type(c_ptr),
value :: cr_d, cs_d, ct_d
305 type(c_ptr),
value :: cx_d, cy_d, cz_d
306 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
307 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
308 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
309 type(c_ptr),
value :: w3_d
310 integer(c_int) :: nel, lx
317 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
318 bind(c, name =
'opencl_dudxyz')
319 use,
intrinsic :: iso_c_binding
320 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
321 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
322 integer(c_int) :: nel, lx
327 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
328 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
329 bind(c, name =
'opencl_cdtp')
330 use,
intrinsic :: iso_c_binding
331 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
332 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
333 integer(c_int) :: nel, lx
339 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
340 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
341 jacinv_d, nel, gdim, lx) &
342 bind(c, name =
'opencl_conv1')
343 use,
intrinsic :: iso_c_binding
344 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
345 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
346 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
347 type(c_ptr),
value :: jacinv_d
348 integer(c_int) :: nel, gdim, lx
354 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
355 use,
intrinsic :: iso_c_binding
356 type(c_ptr),
value :: du_d, u_d
357 type(c_ptr),
value :: cr_d, cs_d, ct_d
358 type(c_ptr),
value :: dx_d, dy_d, dz_d
359 integer(c_int) :: nel, lx
366 drdx_d, dsdx_d, dtdx_d, &
367 drdy_d, dsdy_d, dtdy_d, &
368 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
369 bind(c, name =
'opencl_opgrad')
370 use,
intrinsic :: iso_c_binding
371 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
372 type(c_ptr),
value :: dx_d, dy_d, dz_d
373 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
374 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
375 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
376 type(c_ptr),
value :: w3_d
377 integer(c_int) :: nel, lx
382 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
383 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
384 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
386 bind(c, name =
'opencl_cfl')
387 use,
intrinsic :: iso_c_binding
389 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
390 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
391 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
393 integer(c_int) :: nel, lx
400 drdx_d, dsdx_d, dtdx_d, &
401 drdy_d, dsdy_d, dtdy_d, &
402 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
403 bind(c, name =
'opencl_lambda2')
404 use,
intrinsic :: iso_c_binding
405 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
406 type(c_ptr),
value :: dx_d, dy_d, dz_d
407 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
408 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
409 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
410 type(c_ptr),
value :: jacinv_d
411 integer(c_int) :: nel, lx
417 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
418 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
419 use,
intrinsic :: iso_c_binding
420 type(c_ptr),
value :: cr_d, cs_d, ct_d
421 type(c_ptr),
value :: cx_d, cy_d, cz_d
422 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
423 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
424 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
425 type(c_ptr),
value :: w3_d
426 integer(c_int) :: nel, lx
435 type(
coef_t),
intent(in),
target :: coef
436 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(inout) :: du
437 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(in) :: u, dr, ds, dt
438 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
447 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
449 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
450 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
454 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
458 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
461 call neko_error(
'No device backend configured')
468 type(
coef_t),
intent(in) :: coef
469 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
470 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
471 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
472 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
473 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
481 associate(xh => coef%Xh, msh => coef%msh)
484 xh%dx_d, xh%dy_d, xh%dz_d, &
485 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
486 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
487 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
488 xh%w3_d, msh%nelv, xh%lx)
491 xh%dx_d, xh%dy_d, xh%dz_d, &
492 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
493 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
494 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
495 xh%w3_d, msh%nelv, xh%lx)
498 xh%dx_d, xh%dy_d, xh%dz_d, &
499 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
500 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
501 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
502 xh%w3_d, msh%nelv, xh%lx)
504 call neko_error(
'No device backend configured')
516 integer,
intent(in) :: n
517 integer(kind=i8),
intent(in) :: glb_n_points
518 type(c_ptr),
intent(inout) :: x_d
527 type(
coef_t),
intent(in) :: coef
529 type(
field_t),
intent(in) :: u, v, w
532 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
533 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
534 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
535 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
536 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
539 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
540 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
541 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
542 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
543 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
546 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
547 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
548 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
549 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
550 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
552 call neko_error(
'No device backend configured')
557 type(
coef_t),
intent(in) :: coef
558 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
559 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
560 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
561 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
562 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
563 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
572 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
574 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
575 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
578 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
579 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
583 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
586 call neko_error(
'No device backend configured')
593 type(
space_t),
intent(in) :: xh
595 integer,
intent(in) :: nelv, gdim
596 real(kind=
rp),
intent(inout) :: du(xh%lxyz, nelv)
597 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
598 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
599 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
600 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
601 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
610 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
612 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
613 xh%dx_d, xh%dy_d, xh%dz_d, &
614 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
615 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
616 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
617 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
619 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
620 xh%dx_d, xh%dy_d, xh%dz_d, &
621 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
622 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
623 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
624 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
627 xh%dx_d, xh%dy_d, xh%dz_d, &
628 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
629 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
630 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
631 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
633 call neko_error(
'No device backend configured')
640 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
641 type(space_t),
intent(in) :: xh_gl
642 type(space_t),
intent(in) :: xh_gll
643 type(coef_t),
intent(in) :: coef_gll
644 type(coef_t),
intent(in) :: coef_gl
645 type(interpolator_t),
intent(inout) :: gll_to_gl
646 real(kind=rp),
intent(inout) :: &
647 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
648 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
649 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
650 type(c_ptr) :: du_d, ud_d
651 integer :: n_gl, n_gll
653 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
654 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
656 call device_map(ud, ud_d, n_gl)
658 du_d = device_get_ptr(du)
660 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
663 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
666 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
669 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
671 call neko_error(
'No device backend configured')
674 call gll_to_gl%map(du, ud, nelv, xh_gll)
675 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
676 call device_col2(du_d, coef_gll%Binv_d, n_gll)
680 call device_free(ud_d)
684 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
685 type(field_t),
intent(inout) :: w1
686 type(field_t),
intent(inout) :: w2
687 type(field_t),
intent(inout) :: w3
688 type(field_t),
intent(in) :: u1
689 type(field_t),
intent(in) :: u2
690 type(field_t),
intent(in) :: u3
691 type(field_t),
intent(inout) :: work1
692 type(field_t),
intent(inout) :: work2
693 type(coef_t),
intent(in) :: c_xh
694 type(c_ptr),
optional,
intent(inout) :: event
695 integer :: gdim, n, nelv
702#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
705 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
706 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
707 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
710 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
711 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
712 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
715 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
716 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
717 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
719 if (gdim .eq. 3)
then
722 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
723 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
724 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
727 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
728 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
729 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
732 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
733 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
734 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
736 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
738 call device_copy(w1%x_d, work1%x_d, n)
741 if (gdim .eq. 3)
then
744 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
745 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
746 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
753 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
754 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
755 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
757 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
758 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
759 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
762 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
763 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
764 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
770 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
772 call device_rzero (work1%x_d, n)
775 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
776 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
777 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
780 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
781 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
782 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
785 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
786 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
787 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
789 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
794 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
795 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
796 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
798 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
799 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
800 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
803 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
804 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
805 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
807 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
808 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
809 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
812 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
813 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
814 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
816 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
817 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
818 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
820 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
823 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
825 if (
present(event))
then
827 call c_xh%gs_h%op(w1, gs_op_add, event)
828 call device_event_sync(event)
829 call c_xh%gs_h%op(w2, gs_op_add, event)
830 call device_event_sync(event)
831 call c_xh%gs_h%op(w3, gs_op_add, event)
832 call device_event_sync(event)
836 call c_xh%gs_h%op(w1, gs_op_add)
837 call c_xh%gs_h%op(w2, gs_op_add)
838 call c_xh%gs_h%op(w3, gs_op_add)
842 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
845 call neko_error(
'No device backend configured')
850 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
result(cfl)
853 integer :: nelv, gdim
855 real(kind=rp),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
857 type(c_ptr) :: u_d, v_d, w_d
859 u_d = device_get_ptr(u)
860 v_d = device_get_ptr(v)
861 w_d = device_get_ptr(w)
864 cfl =
hip_cfl(dt, u_d, v_d, w_d, &
865 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
866 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
867 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
868 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
869 coef%jacinv_d, nelv, xh%lx)
872 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
873 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
874 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
875 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
876 coef%jacinv_d, nelv, xh%lx)
879 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
880 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
881 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
882 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
883 coef%jacinv_d, nelv, xh%lx)
886 call neko_error(
'No device backend configured')
892 integer :: idir, ncyc
893 real(rp),
dimension(coef%Xh%lx*coef%Xh%ly*coef%Xh%lz*coef%msh%nelv) :: &
895 type(c_ptr) :: vx_d, vy_d, vz_d
897 vx_d = device_get_ptr(vx)
898 vy_d = device_get_ptr(vy)
899 vz_d = device_get_ptr(vz)
900 ncyc = coef%cyc_msk(0) - 1
902 if (ncyc .le. 0)
return
906 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
907 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
911 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
912 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
916 call neko_error(
'No device backend configured for rotate_cyc')
918 call neko_error(
'No device backend configured for rotate_cyc')
924 integer :: idir, ncyc
925 real(rp),
dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, coef%msh%nelv) :: &
927 type(c_ptr) :: vx_d, vy_d, vz_d
929 vx_d = device_get_ptr(vx)
930 vy_d = device_get_ptr(vy)
931 vz_d = device_get_ptr(vz)
932 ncyc = coef%cyc_msk(0) - 1
934 if (ncyc .le. 0)
return
938 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
939 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
943 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
944 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
948 call neko_error(
'No device backend configured for rotate_cyc')
950 call neko_error(
'No device backend configured for rotate_cyc')
956 type(space_t),
intent(inout) :: xh
957 type(coef_t),
intent(inout) :: coef
958 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
962 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
963 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
964 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
965 xh%w3_d, coef%msh%nelv, xh%lx)
968 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
969 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
970 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
971 xh%w3_d, coef%msh%nelv, xh%lx)
974 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
975 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
976 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
977 xh%w3_d, coef%msh%nelv, xh%lx)
979 call neko_error(
'No device backend configured')
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine, public device_free(x_d)
Deallocate memory on the device.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
integer, parameter, public i8
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public device_ortho(x_d, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.