44 use,
intrinsic :: iso_c_binding
54 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
55 bind(c, name =
'hip_dudxyz')
56 use,
intrinsic :: iso_c_binding
57 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
58 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
59 integer(c_int) :: nel, lx
64 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
65 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
66 bind(c, name =
'hip_cdtp')
67 use,
intrinsic :: iso_c_binding
68 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
69 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
70 integer(c_int) :: nel, lx
76 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
77 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
78 jacinv_d, nel, gdim, lx) &
79 bind(c, name =
'hip_conv1')
80 use,
intrinsic :: iso_c_binding
81 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
82 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
83 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
84 type(c_ptr),
value :: jacinv_d
85 integer(c_int) :: nel, gdim, lx
92 drdx_d, dsdx_d, dtdx_d, &
93 drdy_d, dsdy_d, dtdy_d, &
94 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
95 bind(c, name =
'hip_opgrad')
96 use,
intrinsic :: iso_c_binding
97 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
98 type(c_ptr),
value :: dx_d, dy_d, dz_d
99 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
100 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
101 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
102 type(c_ptr),
value :: w3_d
103 integer(c_int) :: nel, lx
110 drdx_d, dsdx_d, dtdx_d, &
111 drdy_d, dsdy_d, dtdy_d, &
112 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
113 bind(c, name =
'hip_lambda2')
114 use,
intrinsic :: iso_c_binding
115 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
116 type(c_ptr),
value :: dx_d, dy_d, dz_d
117 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
118 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
119 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
120 type(c_ptr),
value :: jacinv_d
121 integer(c_int) :: nel, lx
127 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
128 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
130 bind(c, name =
'hip_cfl')
131 use,
intrinsic :: iso_c_binding
133 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
134 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
135 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
137 integer(c_int) :: nel, lx
146 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
147 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
148 bind(c, name =
'cuda_dudxyz')
149 use,
intrinsic :: iso_c_binding
150 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
151 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
152 integer(c_int) :: nel, lx
157 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
158 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
159 bind(c, name =
'cuda_cdtp')
160 use,
intrinsic :: iso_c_binding
161 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
162 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
163 integer(c_int) :: nel, lx
168 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
169 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
170 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
171 jacinv_d, nel, gdim, lx) &
172 bind(c, name =
'cuda_conv1')
173 use,
intrinsic :: iso_c_binding
174 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
175 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
176 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
177 type(c_ptr),
value :: jacinv_d
178 integer(c_int) :: nel, gdim, lx
185 drdx_d, dsdx_d, dtdx_d, &
186 drdy_d, dsdy_d, dtdy_d, &
187 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
188 bind(c, name =
'cuda_opgrad')
189 use,
intrinsic :: iso_c_binding
190 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
191 type(c_ptr),
value :: dx_d, dy_d, dz_d
192 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
193 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
194 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
195 type(c_ptr),
value :: w3_d
196 integer(c_int) :: nel, lx
203 drdx_d, dsdx_d, dtdx_d, &
204 drdy_d, dsdy_d, dtdy_d, &
205 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
206 bind(c, name =
'cuda_lambda2')
207 use,
intrinsic :: iso_c_binding
208 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
209 type(c_ptr),
value :: dx_d, dy_d, dz_d
210 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
211 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
212 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
213 type(c_ptr),
value :: jacinv_d
214 integer(c_int) :: nel, lx
219 real(c_rp) function
cuda_cfl(dt, u_d, v_d, w_d, &
220 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
221 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
223 bind(c, name =
'cuda_cfl')
224 use,
intrinsic :: iso_c_binding
226 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
227 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
228 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
230 integer(c_int) :: nel, lx
240 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
241 bind(c, name =
'opencl_dudxyz')
242 use,
intrinsic :: iso_c_binding
243 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
244 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
245 integer(c_int) :: nel, lx
250 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
251 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
252 bind(c, name =
'opencl_cdtp')
253 use,
intrinsic :: iso_c_binding
254 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
255 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
256 integer(c_int) :: nel, lx
262 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
263 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
264 jacinv_d, nel, gdim, lx) &
265 bind(c, name =
'opencl_conv1')
266 use,
intrinsic :: iso_c_binding
267 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
268 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
269 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
270 type(c_ptr),
value :: jacinv_d
271 integer(c_int) :: nel, gdim, lx
278 drdx_d, dsdx_d, dtdx_d, &
279 drdy_d, dsdy_d, dtdy_d, &
280 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
281 bind(c, name =
'opencl_opgrad')
282 use,
intrinsic :: iso_c_binding
283 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
284 type(c_ptr),
value :: dx_d, dy_d, dz_d
285 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
286 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
287 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
288 type(c_ptr),
value :: w3_d
289 integer(c_int) :: nel, lx
294 real(c_rp) function
opencl_cfl(dt, u_d, v_d, w_d, &
295 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
296 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
298 bind(c, name =
'opencl_cfl')
299 use,
intrinsic :: iso_c_binding
301 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
302 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
303 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
305 integer(c_int) :: nel, lx
312 drdx_d, dsdx_d, dtdx_d, &
313 drdy_d, dsdy_d, dtdy_d, &
314 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
315 bind(c, name =
'opencl_lambda2')
316 use,
intrinsic :: iso_c_binding
317 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
318 type(c_ptr),
value :: dx_d, dy_d, dz_d
319 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
320 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
321 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
322 type(c_ptr),
value :: jacinv_d
323 integer(c_int) :: nel, lx
333 type(coef_t),
intent(in),
target :: coef
334 real(kind=rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(inout) :: du
335 real(kind=rp),
dimension(coef%Xh%lx, coef%Xh%ly, &
coef%Xh%lz, coef%msh%nelv),
intent(in) :: u, dr, ds, dt
336 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
338 du_d = device_get_ptr(du)
339 u_d = device_get_ptr(u)
341 dr_d = device_get_ptr(dr)
342 ds_d = device_get_ptr(ds)
343 dt_d = device_get_ptr(dt)
345 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
347 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
348 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
352 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
356 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
359 call neko_error(
'No device backend configured')
366 type(coef_t),
intent(in) :: coef
367 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: ux
368 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uy
369 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: uz
370 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: u
371 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
373 ux_d = device_get_ptr(ux)
374 uy_d = device_get_ptr(uy)
375 uz_d = device_get_ptr(uz)
377 u_d = device_get_ptr(u)
379 associate(xh => coef%Xh, msh => coef%msh)
382 xh%dx_d, xh%dy_d, xh%dz_d, &
383 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
384 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
385 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
386 xh%w3_d, msh%nelv, xh%lx)
389 xh%dx_d, xh%dy_d, xh%dz_d, &
390 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
391 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
392 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
393 xh%w3_d, msh%nelv, xh%lx)
396 xh%dx_d, xh%dy_d, xh%dz_d, &
397 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
398 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
399 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
400 xh%w3_d, msh%nelv, xh%lx)
402 call neko_error(
'No device backend configured')
408 type(coef_t),
intent(in) :: coef
410 type(field_t),
intent(in) :: u, v, w
413 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
414 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
415 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
416 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
417 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
420 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
421 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
422 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
423 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
424 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
427 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
428 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
429 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
430 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
431 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
433 call neko_error(
'No device backend configured')
438 type(coef_t),
intent(in) :: coef
439 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: dtx
440 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(inout) :: x
441 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dr
442 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: ds
443 real(kind=rp),
dimension(coef%Xh%lxyz, coef%msh%nelv),
intent(in) :: dt
444 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
446 dtx_d = device_get_ptr(dtx)
447 x_d = device_get_ptr(x)
449 dr_d = device_get_ptr(dr)
450 ds_d = device_get_ptr(ds)
451 dt_d = device_get_ptr(dt)
453 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
455 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
456 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
459 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
460 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
464 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
467 call neko_error(
'No device backend configured')
474 type(space_t),
intent(in) :: xh
475 type(coef_t),
intent(in) :: coef
476 integer,
intent(in) :: nelv, gdim
477 real(kind=rp),
intent(inout) :: du(xh%lxyz, nelv)
478 real(kind=rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u
479 real(kind=rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vx
480 real(kind=rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vy
481 real(kind=rp),
intent(inout),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: vz
482 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
484 du_d = device_get_ptr(du)
485 u_d = device_get_ptr(u)
487 vx_d = device_get_ptr(vx)
488 vy_d = device_get_ptr(vy)
489 vz_d = device_get_ptr(vz)
491 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
493 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
494 xh%dx_d, xh%dy_d, xh%dz_d, &
495 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
496 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
497 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
498 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
500 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
501 xh%dx_d, xh%dy_d, xh%dz_d, &
502 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
503 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
504 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
505 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
508 xh%dx_d, xh%dy_d, xh%dz_d, &
509 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
510 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
511 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
512 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
514 call neko_error(
'No device backend configured')
520 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh, event)
521 type(field_t),
intent(inout) :: w1
522 type(field_t),
intent(inout) :: w2
523 type(field_t),
intent(inout) :: w3
524 type(field_t),
intent(in) :: u1
525 type(field_t),
intent(in) :: u2
526 type(field_t),
intent(in) :: u3
527 type(field_t),
intent(inout) :: work1
528 type(field_t),
intent(inout) :: work2
529 type(coef_t),
intent(in) :: c_xh
530 type(c_ptr),
optional,
intent(inout) :: event
531 integer :: gdim, n, nelv
538#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
541 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
542 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
543 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
546 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
547 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
548 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
551 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
552 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
553 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
555 if (gdim .eq. 3)
then
558 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
559 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
560 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
563 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
564 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
565 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
568 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
569 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
570 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
572 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
574 call device_copy(w1%x_d, work1%x_d, n)
577 if (gdim .eq. 3)
then
580 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
581 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
582 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
584 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
585 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
586 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
589 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
590 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
591 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
593 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
594 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
595 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
598 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
599 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
600 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
602 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
603 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
604 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
606 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
608 call device_rzero (work1%x_d, n)
611 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
612 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
613 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
616 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
617 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
618 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
621 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
622 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
623 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
625 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
630 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
631 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
632 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
634 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
635 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
636 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
639 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
640 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
641 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
643 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
644 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
645 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
648 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
649 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
650 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
652 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
653 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
654 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
656 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
659 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
661 if (
present(event))
then
662 call c_xh%gs_h%op(w1, gs_op_add, event)
663 call device_event_sync(event)
664 call c_xh%gs_h%op(w2, gs_op_add, event)
665 call device_event_sync(event)
666 call c_xh%gs_h%op(w3, gs_op_add, event)
667 call device_event_sync(event)
669 call c_xh%gs_h%op(w1, gs_op_add)
670 call c_xh%gs_h%op(w2, gs_op_add)
671 call c_xh%gs_h%op(w3, gs_op_add)
674 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
677 call neko_error(
'No device backend configured')
682 function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim)
result(cfl)
685 integer :: nelv, gdim
687 real(kind=rp),
dimension(Xh%lx, Xh%ly, Xh%lz, nelv) :: u, v, w
689 type(c_ptr) :: u_d, v_d, w_d
691 u_d = device_get_ptr(u)
692 v_d = device_get_ptr(v)
693 w_d = device_get_ptr(w)
696 cfl =
hip_cfl(dt, u_d, v_d, w_d, &
697 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
698 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
699 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
700 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
701 coef%jacinv_d, nelv, xh%lx)
704 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
705 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
706 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
707 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
708 coef%jacinv_d, nelv, xh%lx)
711 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
712 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
713 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
714 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
715 coef%jacinv_d, nelv, xh%lx)
718 call neko_error(
'No device backend configured')
Return the device pointer for an associated Fortran array.
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Operators accelerator backends.
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Defines a function space.
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.