46 use,
intrinsic :: iso_c_binding
58 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
59 bind(c, name =
'hip_dudxyz')
60 use,
intrinsic :: iso_c_binding
61 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
62 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
63 integer(c_int) :: nel, lx
68 subroutine hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
69 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
70 bind(c, name =
'hip_cdtp')
71 use,
intrinsic :: iso_c_binding
72 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
73 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
74 integer(c_int) :: nel, lx
80 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
81 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
82 jacinv_d, nel, gdim, lx) &
83 bind(c, name =
'hip_conv1')
84 use,
intrinsic :: iso_c_binding
85 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
86 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
87 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
88 type(c_ptr),
value :: jacinv_d
89 integer(c_int) :: nel, gdim, lx
95 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'hip_convect_scalar')
96 use,
intrinsic :: iso_c_binding
97 type(c_ptr),
value :: du_d, u_d
98 type(c_ptr),
value :: cr_d, cs_d, ct_d
99 type(c_ptr),
value :: dx_d, dy_d, dz_d
100 integer(c_int) :: nel, lx
107 drdx_d, dsdx_d, dtdx_d, &
108 drdy_d, dsdy_d, dtdy_d, &
109 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
110 bind(c, name =
'hip_opgrad')
111 use,
intrinsic :: iso_c_binding
112 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
113 type(c_ptr),
value :: dx_d, dy_d, dz_d
114 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
115 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
116 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
117 type(c_ptr),
value :: w3_d
118 integer(c_int) :: nel, lx
125 drdx_d, dsdx_d, dtdx_d, &
126 drdy_d, dsdy_d, dtdy_d, &
127 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
128 bind(c, name =
'hip_lambda2')
129 use,
intrinsic :: iso_c_binding
130 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
131 type(c_ptr),
value :: dx_d, dy_d, dz_d
132 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
133 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
134 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
135 type(c_ptr),
value :: jacinv_d
136 integer(c_int) :: nel, lx
142 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
143 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
145 bind(c, name =
'hip_cfl')
146 use,
intrinsic :: iso_c_binding
148 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
149 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
150 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
152 integer(c_int) :: nel, lx
159 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
160 bind(c, name =
'hip_rotate_cyc')
161 use,
intrinsic :: iso_c_binding
162 type(c_ptr),
value :: vx_d, vy_d, vz_d
163 type(c_ptr),
value :: x_d, y_d, z_d
164 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
165 integer(c_int) :: ncyc, idir
171 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
172 dtdz_d, w3_d, nel, lx) bind(c, name = 'hip_set_convect_rst')
173 use,
intrinsic :: iso_c_binding
174 type(c_ptr),
value :: cr_d, cs_d, ct_d
175 type(c_ptr),
value :: cx_d, cy_d, cz_d
176 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
177 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
178 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
179 type(c_ptr),
value :: w3_d
180 integer(c_int) :: nel, lx
186 subroutine cuda_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
187 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
188 bind(c, name =
'cuda_dudxyz')
189 use,
intrinsic :: iso_c_binding
190 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
191 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
192 integer(c_int) :: nel, lx
197 subroutine cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
198 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
199 bind(c, name =
'cuda_cdtp')
200 use,
intrinsic :: iso_c_binding
201 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
202 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
203 integer(c_int) :: nel, lx
208 subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
209 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
210 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
211 jacinv_d, nel, gdim, lx) &
212 bind(c, name =
'cuda_conv1')
213 use,
intrinsic :: iso_c_binding
214 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
215 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
216 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
217 type(c_ptr),
value :: jacinv_d
218 integer(c_int) :: nel, gdim, lx
224 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'cuda_convect_scalar')
225 use,
intrinsic :: iso_c_binding
226 type(c_ptr),
value :: du_d, u_d
227 type(c_ptr),
value :: cr_d, cs_d, ct_d
228 type(c_ptr),
value :: dx_d, dy_d, dz_d
229 integer(c_int) :: nel, lx
236 drdx_d, dsdx_d, dtdx_d, &
237 drdy_d, dsdy_d, dtdy_d, &
238 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
239 bind(c, name =
'cuda_opgrad')
240 use,
intrinsic :: iso_c_binding
241 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
242 type(c_ptr),
value :: dx_d, dy_d, dz_d
243 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
244 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
245 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
246 type(c_ptr),
value :: w3_d
247 integer(c_int) :: nel, lx
254 drdx_d, dsdx_d, dtdx_d, &
255 drdy_d, dsdy_d, dtdy_d, &
256 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
257 bind(c, name =
'cuda_lambda2')
258 use,
intrinsic :: iso_c_binding
259 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
260 type(c_ptr),
value :: dx_d, dy_d, dz_d
261 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
262 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
263 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
264 type(c_ptr),
value :: jacinv_d
265 integer(c_int) :: nel, lx
270 real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
271 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
272 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
274 bind(c, name =
'cuda_cfl')
275 use,
intrinsic :: iso_c_binding
277 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
278 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
279 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
281 integer(c_int) :: nel, lx
288 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
289 bind(c, name =
'cuda_rotate_cyc')
290 use,
intrinsic :: iso_c_binding
291 type(c_ptr),
value :: vx_d, vy_d, vz_d
292 type(c_ptr),
value :: x_d, y_d, z_d
293 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
294 integer(c_int) :: ncyc, idir
300 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
301 dtdz_d, w3_d, nel, lx) bind(c, name = 'cuda_set_convect_rst')
302 use,
intrinsic :: iso_c_binding
303 type(c_ptr),
value :: cr_d, cs_d, ct_d
304 type(c_ptr),
value :: cx_d, cy_d, cz_d
305 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
306 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
307 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
308 type(c_ptr),
value :: w3_d
309 integer(c_int) :: nel, lx
316 dx_d, dy_d, dz_d, jacinv_d, nel, lx) &
317 bind(c, name =
'opencl_dudxyz')
318 use,
intrinsic :: iso_c_binding
319 type(c_ptr),
value :: du_d, u_d, dr_d, ds_d, dt_d
320 type(c_ptr),
value :: dx_d, dy_d, dz_d, jacinv_d
321 integer(c_int) :: nel, lx
326 subroutine opencl_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
327 dxt_d, dyt_d, dzt_d, w3_d, nel, lx) &
328 bind(c, name =
'opencl_cdtp')
329 use,
intrinsic :: iso_c_binding
330 type(c_ptr),
value :: dtx_d, x_d, dr_d, ds_d, dt_d
331 type(c_ptr),
value :: dxt_d, dyt_d, dzt_d, w3_d
332 integer(c_int) :: nel, lx
338 dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d, &
339 drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d, &
340 jacinv_d, nel, gdim, lx) &
341 bind(c, name =
'opencl_conv1')
342 use,
intrinsic :: iso_c_binding
343 type(c_ptr),
value :: du_d, u_d, vx_d, vy_d, vz_d
344 type(c_ptr),
value :: dx_d, dy_d, dz_d, drdx_d, dsdx_d, dtdx_d
345 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
346 type(c_ptr),
value :: jacinv_d
347 integer(c_int) :: nel, gdim, lx
353 dx_d, dy_d, dz_d, nel, lx) bind(c, name = 'opencl_convect_scalar')
354 use,
intrinsic :: iso_c_binding
355 type(c_ptr),
value :: du_d, u_d
356 type(c_ptr),
value :: cr_d, cs_d, ct_d
357 type(c_ptr),
value :: dx_d, dy_d, dz_d
358 integer(c_int) :: nel, lx
365 drdx_d, dsdx_d, dtdx_d, &
366 drdy_d, dsdy_d, dtdy_d, &
367 drdz_d, dsdz_d, dtdz_d, w3_d, nel, lx) &
368 bind(c, name =
'opencl_opgrad')
369 use,
intrinsic :: iso_c_binding
370 type(c_ptr),
value :: ux_d, uy_d, uz_d, u_d
371 type(c_ptr),
value :: dx_d, dy_d, dz_d
372 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
373 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
374 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
375 type(c_ptr),
value :: w3_d
376 integer(c_int) :: nel, lx
381 real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
382 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, &
383 drdz_d, dsdz_d, dtdz_d, dr_inv_d, ds_inv_d, dt_inv_d, &
385 bind(c, name =
'opencl_cfl')
386 use,
intrinsic :: iso_c_binding
388 type(c_ptr),
value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
389 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
390 type(c_ptr),
value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
392 integer(c_int) :: nel, lx
399 drdx_d, dsdx_d, dtdx_d, &
400 drdy_d, dsdy_d, dtdy_d, &
401 drdz_d, dsdz_d, dtdz_d, jacinv_d, nel, lx) &
402 bind(c, name =
'opencl_lambda2')
403 use,
intrinsic :: iso_c_binding
404 type(c_ptr),
value :: lambda2_d, u_d, v_d, w_d
405 type(c_ptr),
value :: dx_d, dy_d, dz_d
406 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
407 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
408 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
409 type(c_ptr),
value :: jacinv_d
410 integer(c_int) :: nel, lx
417 cyc_msk_d, R11_d, R12_d, ncyc, idir) &
418 bind(c, name =
'opencl_rotate_cyc')
419 use,
intrinsic :: iso_c_binding
420 type(c_ptr),
value :: vx_d, vy_d, vz_d
421 type(c_ptr),
value :: x_d, y_d, z_d
422 type(c_ptr),
value :: cyc_msk_d, R11_d, R12_d
423 integer(c_int) :: ncyc, idir
429 drdx_d, dsdx_d, dtdx_d, drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, &
430 dtdz_d, w3_d, nel, lx) bind(c, name = 'opencl_set_convect_rst')
431 use,
intrinsic :: iso_c_binding
432 type(c_ptr),
value :: cr_d, cs_d, ct_d
433 type(c_ptr),
value :: cx_d, cy_d, cz_d
434 type(c_ptr),
value :: drdx_d, dsdx_d, dtdx_d
435 type(c_ptr),
value :: drdy_d, dsdy_d, dtdy_d
436 type(c_ptr),
value :: drdz_d, dsdz_d, dtdz_d
437 type(c_ptr),
value :: w3_d
438 integer(c_int) :: nel, lx
447 type(
coef_t),
intent(in),
target :: coef
448 type(c_ptr),
intent(inout) :: du_d
449 type(c_ptr),
intent(in) :: u_d, dr_d, ds_d, dt_d
451 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
453 call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
454 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
458 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
462 xh%dx_d, xh%dy_d, xh%dz_d, coef%jacinv_d, &
465 call neko_error(
'No device backend configured')
472 type(
coef_t),
intent(in),
target :: coef
473 type(c_ptr),
intent(inout) :: ux_d, uy_d, uz_d
474 type(c_ptr),
intent(in) :: u_d
476 associate(xh => coef%Xh, msh => coef%msh)
479 xh%dx_d, xh%dy_d, xh%dz_d, &
480 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
481 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
482 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
483 xh%w3_d, msh%nelv, xh%lx)
486 xh%dx_d, xh%dy_d, xh%dz_d, &
487 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
488 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
489 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
490 xh%w3_d, msh%nelv, xh%lx)
493 xh%dx_d, xh%dy_d, xh%dz_d, &
494 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
495 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
496 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
497 xh%w3_d, msh%nelv, xh%lx)
499 call neko_error(
'No device backend configured')
511 integer,
intent(in) :: n
512 integer(kind=i8),
intent(in) :: glb_n_points
513 type(c_ptr),
intent(inout) :: x_d
522 type(
coef_t),
intent(in) :: coef
523 type(c_ptr),
intent(inout) :: lambda2_d
524 type(c_ptr),
intent(in) :: u_d, v_d, w_d
527 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
528 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
529 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
530 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
531 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
534 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
535 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
536 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
537 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
538 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
541 coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
542 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
543 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
544 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
545 coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
547 call neko_error(
'No device backend configured')
552 type(
coef_t),
intent(in),
target :: coef
553 type(c_ptr),
intent(inout) :: dtx_d, x_d
554 type(c_ptr),
intent(in) :: dr_d, ds_d, dt_d
556 associate(xh => coef%Xh, msh => coef%msh, dof => coef%dof)
558 call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
559 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
562 call cuda_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
563 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
567 xh%dxt_d, xh%dyt_d, xh%dzt_d, xh%w3_d, &
570 call neko_error(
'No device backend configured')
577 type(
space_t),
intent(in) :: xh
578 type(
coef_t),
intent(in),
target :: coef
579 integer,
intent(in) :: nelv, gdim
580 type(c_ptr),
intent(inout) :: du_d
581 type(c_ptr),
intent(in) :: u_d, vx_d, vy_d, vz_d
583 associate(msh => coef%msh, dof => coef%dof)
585 call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
586 xh%dx_d, xh%dy_d, xh%dz_d, &
587 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
588 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
589 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
590 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
592 call cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
593 xh%dx_d, xh%dy_d, xh%dz_d, &
594 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
595 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
596 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
597 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
600 xh%dx_d, xh%dy_d, xh%dz_d, &
601 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
602 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
603 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
604 coef%jacinv_d, msh%nelv, msh%gdim, xh%lx)
606 call neko_error(
'No device backend configured')
613 Xh_GLL, Xh_GL, coef_GLL, coef_GL, GLL_to_GL)
614 type(space_t),
intent(in) :: xh_gl
615 type(space_t),
intent(in) :: xh_gll
616 type(coef_t),
intent(in) :: coef_gll
617 type(coef_t),
intent(in) :: coef_gl
618 type(interpolator_t),
intent(inout) :: gll_to_gl
619 real(kind=rp),
intent(inout) :: &
620 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
621 type(c_ptr) :: cr_d, cs_d, ct_d, u_d
622 real(kind=rp) :: ud(xh_gl%lx*xh_gl%lx*xh_gl%lx)
623 type(c_ptr) :: du_d, ud_d
624 integer :: n_gl, n_gll
626 n_gll = coef_gl%msh%nelv * xh_gl%lxyz
627 n_gll = coef_gl%msh%nelv * xh_gll%lxyz
629 call device_map(ud, ud_d, n_gl)
631 du_d = device_get_ptr(du)
633 associate(xh => xh_gl, nelv => coef_gl%msh%nelv, lx => xh_gl%lx)
636 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
639 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
642 xh%dx_d, xh%dy_d, xh%dz_d, nelv, lx)
644 call neko_error(
'No device backend configured')
647 call gll_to_gl%map(du, ud, nelv, xh_gll)
648 call coef_gll%gs_h%op(du, n_gll, gs_op_add)
649 call device_col2(du_d, coef_gll%Binv_d, n_gll)
653 call device_free(ud_d)
658 type(field_t),
intent(inout) :: w1
659 type(field_t),
intent(inout) :: w2
660 type(field_t),
intent(inout) :: w3
661 type(field_t),
intent(in) :: u1
662 type(field_t),
intent(in) :: u2
663 type(field_t),
intent(in) :: u3
664 type(field_t),
intent(inout) :: work1
665 type(field_t),
intent(inout) :: work2
666 type(coef_t),
intent(in) :: c_xh
667 type(c_ptr),
optional,
intent(inout) :: event
668 integer :: gdim, n, nelv
675#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
678 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
679 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
680 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
683 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
684 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
685 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
688 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
689 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
690 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
692 if (gdim .eq. 3)
then
695 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
696 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
697 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
700 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
701 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
702 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
705 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
706 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
707 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
709 call device_sub3(w1%x_d, work1%x_d, work2%x_d, n)
711 call device_copy(w1%x_d, work1%x_d, n)
714 if (gdim .eq. 3)
then
717 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
718 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
719 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
721 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
722 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
723 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
726 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
727 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
728 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
730 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
731 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
732 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
735 c_xh%drdz_d, c_xh%dsdz_d, c_xh%dtdz_d,&
736 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
737 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
739 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
740 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
741 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
743 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
745 call device_rzero (work1%x_d, n)
748 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
749 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
750 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
753 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
754 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
755 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
758 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
759 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
760 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
762 call device_sub3(w2%x_d, work1%x_d, work2%x_d, n)
767 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
768 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
769 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
771 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
772 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
773 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
776 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
777 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
778 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
780 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
781 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
782 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
785 c_xh%drdx_d, c_xh%dsdx_d, c_xh%dtdx_d,&
786 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
787 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
789 c_xh%drdy_d, c_xh%dsdy_d, c_xh%dtdy_d,&
790 c_xh%Xh%dx_d, c_xh%Xh%dy_d, c_xh%Xh%dz_d, &
791 c_xh%jacinv_d, nelv, c_xh%Xh%lx)
793 call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
796 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%B_d, gdim, n)
798 if (
present(event))
then
800 call c_xh%gs_h%op(w1, gs_op_add, event)
801 call device_event_sync(event)
802 call c_xh%gs_h%op(w2, gs_op_add, event)
803 call device_event_sync(event)
804 call c_xh%gs_h%op(w3, gs_op_add, event)
805 call device_event_sync(event)
809 call c_xh%gs_h%op(w1, gs_op_add)
810 call c_xh%gs_h%op(w2, gs_op_add)
811 call c_xh%gs_h%op(w3, gs_op_add)
815 call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_xh%Binv_d, gdim, n)
818 call neko_error(
'No device backend configured')
826 integer :: nelv, gdim
828 type(c_ptr),
intent(in) :: u_d, v_d, w_d
832 cfl =
hip_cfl(dt, u_d, v_d, w_d, &
833 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
834 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
835 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
836 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
837 coef%jacinv_d, nelv, xh%lx)
840 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
841 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
842 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
843 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
844 coef%jacinv_d, nelv, xh%lx)
847 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
848 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
849 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
850 xh%dr_inv_d, xh%ds_inv_d, xh%dt_inv_d, &
851 coef%jacinv_d, nelv, xh%lx)
854 call neko_error(
'No device backend configured')
859 type(c_ptr),
intent(inout) :: vx_d, vy_d, vz_d
860 integer,
intent(in) :: idir
861 type(coef_t),
intent(in) :: coef
864 ncyc = coef%cyc_msk(0) - 1
866 if (ncyc .le. 0)
return
870 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
871 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
875 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
876 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
880 coef%dof%x_d, coef%dof%y_d, coef%dof%z_d, &
881 coef%cyc_msk_d, coef%R11_d, coef%R12_d, &
884 call neko_error(
'No device backend configured for rotate_cyc')
890 type(space_t),
intent(inout) :: xh
891 type(coef_t),
intent(inout) :: coef
892 type(c_ptr),
intent(inout) :: cr_d, cs_d, ct_d, cx_d, cy_d, cz_d
896 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
897 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
898 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
899 xh%w3_d, coef%msh%nelv, xh%lx)
902 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
903 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
904 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
905 xh%w3_d, coef%msh%nelv, xh%lx)
908 coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
909 coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
910 coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
911 xh%w3_d, coef%msh%nelv, xh%lx)
913 call neko_error(
'No device backend configured')
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine, public device_free(x_d)
Deallocate memory on the device.
Routines to interpolate between different spaces.
integer, parameter, public i8
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
subroutine, public opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
real(kind=rp) function, public opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
subroutine, public opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
subroutine, public device_ortho(x_d, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
subroutine, public opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
subroutine, public opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
Defines a function space.
void opencl_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
void cuda_cdtp(void *dtx, void *x, void *dr, void *ds, void *dt, void *dxt, void *dyt, void *dzt, void *w3, int *nel, int *lx)
real opencl_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
real cuda_cfl(real *dt, void *u, void *v, void *w, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *dr_inv, void *ds_inv, void *dt_inv, void *jacinv, int *nel, int *lx)
void opencl_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void cuda_conv1(void *du, void *u, void *vx, void *vy, void *vz, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *gdim, int *lx)
void opencl_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void cuda_convect_scalar(void *du, void *u, void *cr, void *cs, void *ct, void *dx, void *dy, void *dz, int *nel, int *lx)
void opencl_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void cuda_dudxyz(void *du, void *u, void *dr, void *ds, void *dt, void *dx, void *dy, void *dz, void *jacinv, int *nel, int *lx)
void opencl_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void cuda_lambda2(void *lambda2, void *u, void *v, void *w, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *jacinv, int *nel, int *lx)
void opencl_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_opgrad(void *ux, void *uy, void *uz, void *u, void *dx, void *dy, void *dz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void opencl_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void cuda_rotate_cyc(void *vx, void *vy, void *vz, void *x, void *y, void *z, void *cyc_msk, void *R11, void *R12, int *ncyc, int *idir)
void opencl_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
void cuda_set_convect_rst(void *cr, void *cs, void *ct, void *cx, void *cy, void *cz, void *drdx, void *dsdx, void *dtdx, void *drdy, void *dsdy, void *dtdy, void *drdz, void *dsdz, void *dtdz, void *w3, int *nel, int *lx)
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Interpolation between two space::space_t.
The function space for the SEM solution fields.