45 use,
intrinsic :: iso_c_binding
56 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
57 bind(c, name =
'hip_dudxyz')
58 use,
intrinsic :: iso_c_binding
59 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
60 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
61 integer(c_int) :: nel, lx
66 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
67 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
68 bind(c, name =
'hip_cdtp')
69 use,
intrinsic :: iso_c_binding
70 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
71 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
72 integer(c_int) :: nel, lx
78 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
79 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
80 jacinv_d, nel, gdim, lx) &
81 bind(c, name =
'hip_conv1')
82 use,
intrinsic :: iso_c_binding
83 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
84 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
85 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
86 type(c_ptr),
value :: jacinv_d
87 integer(c_int) :: nel, gdim, lx
93 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
94 use,
intrinsic :: iso_c_binding
95 type(c_ptr),
value :: du_d, u_d
96 type(c_ptr),
value :: cr_d, cs_d, ct_d
97 type(c_ptr),
value :: dx_d, dy_d, dz_d
98 integer(c_int) :: nel, lx
105 drdx_d, dsdx_d, dtdx_d, &
106 drdy_d, dsdy_d, dtdy_d, &
107 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
108 bind(c, name =
'hip_opgrad')
109 use,
intrinsic :: iso_c_binding
110 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
111 type(c_ptr),
value :: dx_d, dy_d, dz_d
112 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
113 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
114 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
115 type(c_ptr),
value :: w3_d
116 integer(c_int) :: nel, lx
123 drdx_d, dsdx_d, dtdx_d, &
124 drdy_d, dsdy_d, dtdy_d, &
125 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
126 bind(c, name =
'hip_lambda2')
127 use,
intrinsic :: iso_c_binding
128 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
129 type(c_ptr),
value :: dx_d, dy_d, dz_d
130 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
131 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
132 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
133 type(c_ptr),
value :: jacinv_d
134 integer(c_int) :: nel, lx
140 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
141 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
143 bind(c, name =
'hip_cfl')
144 use,
intrinsic :: iso_c_binding
146 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
147 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
148 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
150 integer(c_int) :: nel, lx
156 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
157 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
158 use,
intrinsic :: iso_c_binding
159 type(c_ptr),
value :: cr_d, cs_d, ct_d
160 type(c_ptr),
value :: cx_d, cy_d, cz_d
161 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
162 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
163 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
164 type(c_ptr),
value :: w3_d
165 integer(c_int) :: nel, lx
171 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
172 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
173 bind(c, name =
'cuda_dudxyz')
174 use,
intrinsic :: iso_c_binding
175 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
176 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
177 integer(c_int) :: nel, lx
182 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
183 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
184 bind(c, name =
'cuda_cdtp')
185 use,
intrinsic :: iso_c_binding
186 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
187 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
188 integer(c_int) :: nel, lx
193 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
194 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
195 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
196 jacinv_d, nel, gdim, lx) &
197 bind(c, name =
'cuda_conv1')
198 use,
intrinsic :: iso_c_binding
199 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
200 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
201 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
202 type(c_ptr),
value :: jacinv_d
203 integer(c_int) :: nel, gdim, lx
209 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
210 use,
intrinsic :: iso_c_binding
211 type(c_ptr),
value :: du_d, u_d
212 type(c_ptr),
value :: cr_d, cs_d, ct_d
213 type(c_ptr),
value :: dx_d, dy_d, dz_d
214 integer(c_int) :: nel, lx
221 drdx_d, dsdx_d, dtdx_d, &
222 drdy_d, dsdy_d, dtdy_d, &
223 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
224 bind(c, name =
'cuda_opgrad')
225 use,
intrinsic :: iso_c_binding
226 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
227 type(c_ptr),
value :: dx_d, dy_d, dz_d
228 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
229 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
230 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
231 type(c_ptr),
value :: w3_d
232 integer(c_int) :: nel, lx
239 drdx_d, dsdx_d, dtdx_d, &
240 drdy_d, dsdy_d, dtdy_d, &
241 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
242 bind(c, name =
'cuda_lambda2')
243 use,
intrinsic :: iso_c_binding
244 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
245 type(c_ptr),
value :: dx_d, dy_d, dz_d
246 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
247 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
248 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
249 type(c_ptr),
value :: jacinv_d
250 integer(c_int) :: nel, lx
255 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
256 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
257 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
259 bind(c, name =
'cuda_cfl')
260 use,
intrinsic :: iso_c_binding
262 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
263 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
264 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
266 integer(c_int) :: nel, lx
272 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
273 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
274 use,
intrinsic :: iso_c_binding
275 type(c_ptr),
value :: cr_d, cs_d, ct_d
276 type(c_ptr),
value :: cx_d, cy_d, cz_d
277 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
278 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
279 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
280 type(c_ptr),
value :: w3_d
281 integer(c_int) :: nel, lx
288 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
289 bind(c, name =
'opencl_dudxyz')
290 use,
intrinsic :: iso_c_binding
291 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
292 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
293 integer(c_int) :: nel, lx
298 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
299 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
300 bind(c, name =
'opencl_cdtp')
301 use,
intrinsic :: iso_c_binding
302 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
303 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
304 integer(c_int) :: nel, lx
310 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
311 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
312 jacinv_d, nel, gdim, lx) &
313 bind(c, name =
'opencl_conv1')
314 use,
intrinsic :: iso_c_binding
315 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
316 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
317 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
318 type(c_ptr),
value :: jacinv_d
319 integer(c_int) :: nel, gdim, lx
325 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
326 use,
intrinsic :: iso_c_binding
327 type(c_ptr),
value :: du_d, u_d
328 type(c_ptr),
value :: cr_d, cs_d, ct_d
329 type(c_ptr),
value :: dx_d, dy_d, dz_d
330 integer(c_int) :: nel, lx
337 drdx_d, dsdx_d, dtdx_d, &
338 drdy_d, dsdy_d, dtdy_d, &
339 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
340 bind(c, name =
'opencl_opgrad')
341 use,
intrinsic :: iso_c_binding
342 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
343 type(c_ptr),
value :: dx_d, dy_d, dz_d
344 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
345 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
346 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
347 type(c_ptr),
value :: w3_d
348 integer(c_int) :: nel, lx
353 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
354 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
355 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
357 bind(c, name =
'opencl_cfl')
358 use,
intrinsic :: iso_c_binding
360 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
361 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
362 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
364 integer(c_int) :: nel, lx
371 drdx_d, dsdx_d, dtdx_d, &
372 drdy_d, dsdy_d, dtdy_d, &
373 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
374 bind(c, name =
'opencl_lambda2')
375 use,
intrinsic :: iso_c_binding
376 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
377 type(c_ptr),
value :: dx_d, dy_d, dz_d
378 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
379 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
380 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
381 type(c_ptr),
value :: jacinv_d
382 integer(c_int) :: nel, lx
388 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
389 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
390 use,
intrinsic :: iso_c_binding
391 type(c_ptr),
value :: cr_d, cs_d, ct_d
392 type(c_ptr),
value :: cx_d, cy_d, cz_d
393 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
394 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
395 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
396 type(c_ptr),
value :: w3_d
397 integer(c_int) :: nel, lx
406 type(
coef_t),
intent(in),
target :: coef
407 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(inout) :: du
408 real(kind=
rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(in) :: u, dr, ds, dt
409 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
418 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
420 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
421 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
425 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
429 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
432 call neko_error(
'No device backend configured')
439 type(
coef_t),
intent(in) :: coef
440 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
441 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
442 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
443 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
444 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
452 associate(xh => coef%Xh, msh => coef%msh)
455 xh%dx_d, xh%dy_d, xh%dz_d, &
456 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
457 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
458 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
459 xh%w3_d, msh%nelv, xh%lx)
462 xh%dx_d, xh%dy_d, xh%dz_d, &
463 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
464 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
465 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
466 xh%w3_d, msh%nelv, xh%lx)
469 xh%dx_d, xh%dy_d, xh%dz_d, &
470 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
471 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
472 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
473 xh%w3_d, msh%nelv, xh%lx)
475 call neko_error(
'No device backend configured')
481 type(
coef_t),
intent(in) :: coef
483 type(
field_t),
intent(in) :: u, v, w
486 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
487 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
493 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
494 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
500 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
501 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
502 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
503 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
504 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
506 call neko_error(
'No device backend configured')
511 type(
coef_t),
intent(in) :: coef
512 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
513 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
514 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
515 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
516 real(kind=
rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
517 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
526 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
528 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
529 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
532 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
533 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
537 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
540 call neko_error(
'No device backend configured')
547 type(
space_t),
intent(in) :: xh
549 integer,
intent(in) :: nelv, gdim
550 real(kind=
rp),
intent(inout) :: du(xh%lxyz, nelv)
551 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
552 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
553 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
554 real(kind=
rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
555 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
564 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
566 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
567 xh%dx_d, xh%dy_d, xh%dz_d, &
568 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
569 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
570 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
571 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
573 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
574 xh%dx_d, xh%dy_d, xh%dz_d, &
575 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
576 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
577 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
578 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
581 xh%dx_d, xh%dy_d, xh%dz_d, &
582 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
583 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
584 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
585 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
587 call neko_error(
'No device backend configured')
594 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
595 type(space_t),
intent(in) :: xh_gl
596 type(space_t),
intent(in) :: xh_gll
597 type(coef_t),
intent(in) :: coef_gll
598 type(coef_t),
intent(in) :: coef_gl
599 type(interpolator_t),
intent(inout) :: gll_to_gl
600 real(kind=rp),
intent(inout) :: &
601 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
602 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
603 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
604 type(c_ptr) :: du_d, ud_d
605 integer :: n_gl, n_gll
607 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
608 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
610 call device_map(ud, ud_d, n_gl)
612 du_d = device_get_ptr(du)
614 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
617 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
620 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
623 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
625 call neko_error(
'No device backend configured')
628 call gll_to_gl%map(du, ud, nelv, xh_gll)
629 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
630 call device_col2(du_d, coef_gll%Binv_d, n_gll)
634 call device_free(ud_d)
638 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
639 type(field_t),
intent(inout) :: w1
640 type(field_t),
intent(inout) :: w2
641 type(field_t),
intent(inout) :: w3
642 type(field_t),
intent(in) :: u1
643 type(field_t),
intent(in) :: u2
644 type(field_t),
intent(in) :: u3
645 type(field_t),
intent(inout) :: work1
646 type(field_t),
intent(inout) :: work2
647 type(coef_t),
intent(in) :: c_xh
648 type(c_ptr),
optional,
intent(inout) :: event
649 integer :: gdim, n, nelv
656#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
659 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
660 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
661 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
664 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
665 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
666 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
669 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
670 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
671 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
673 if (gdim .eq. 3)
then
676 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
677 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
678 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
681 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
682 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
683 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
686 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
687 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
688 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
690 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
692 call device_copy(w1%x_d, work1%x_d, n)
695 if (gdim .eq. 3)
then
698 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
699 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
700 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
702 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
703 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
704 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
707 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
708 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
709 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
711 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
712 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
713 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
716 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
717 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
718 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
720 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
721 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
722 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
724 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
726 call device_rzero (work1%x_d, n)
729 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
730 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
731 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
734 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
735 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
736 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
739 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
740 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
741 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
743 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
752 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
753 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
754 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
757 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
758 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
759 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
761 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
762 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
763 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
766 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
767 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
768 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
770 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
771 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
772 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
774 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
777 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
779 if (
present(event))
then
780 call c_xh%gs_h%op(w1, gs_op_add, event)
781 call device_event_sync(event)
782 call c_xh%gs_h%op(w2, gs_op_add, event)
783 call device_event_sync(event)
784 call c_xh%gs_h%op(w3, gs_op_add, event)
785 call device_event_sync(event)
787 call c_xh%gs_h%op(w1, gs_op_add)
788 call c_xh%gs_h%op(w2, gs_op_add)
789 call c_xh%gs_h%op(w3, gs_op_add)
792 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
795 call neko_error(
'No device backend configured')
800 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
result(cfl)
803 integer :: nelv, gdim
805 real(kind=rp),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
807 type(c_ptr) :: u_d, v_d, w_d
809 u_d = device_get_ptr(u)
810 v_d = device_get_ptr(v)
811 w_d = device_get_ptr(w)
814 cfl =
hip_cfl(dt, u_d, v_d, w_d, &
815 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
816 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
817 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
818 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
819 coef%jacinv_d, nelv, xh%lx)
822 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
823 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
824 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
825 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
826 coef%jacinv_d, nelv, xh%lx)
829 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
830 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
831 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
832 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
833 coef%jacinv_d, nelv, xh%lx)
836 call neko_error(
'No device backend configured')
842 type(space_t),
intent(inout) :: xh
843 type(coef_t),
intent(inout) :: coef
844 type(c_ptr) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
848 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
849 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
850 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
851 xh%w3_d, coef%msh%nelv, xh%lx)
854 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
855 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
856 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
857 xh%w3_d, coef%msh%nelv, xh%lx)
860 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
861 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
862 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
863 xh%w3_d, coef%msh%nelv, xh%lx)
865 call neko_error(
'No device backend configured')
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine, public device_free(x_d)
Deallocate memory on the device.
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.