34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only : mpi_sum, mpi_in_place, mpi_allreduce
73 type(c_ptr) :: a_d, b_d
75 type(c_ptr),
optional :: strm
80 if (
present(strm))
then
93 call neko_error(
'no device backend configured')
99 type(c_ptr) :: a_d, b_d, mask_d
101 type(c_ptr),
optional :: strm
104 if (n .lt. 1 .or. n_mask .lt. 1)
return
106 if (
present(strm))
then
119 call neko_error(
'no device backend configured')
125 type(c_ptr) :: a_d, b_d, mask_d
127 type(c_ptr),
optional :: strm
130 if (n .lt. 1 .or. n_mask .lt. 1)
return
132 if (
present(strm))
then
145 call neko_error(
'no device backend configured')
151 type(c_ptr) :: a_d, b_d, mask_d
153 type(c_ptr),
optional :: strm
156 if (n .lt. 1 .or. n_mask .lt. 1)
return
158 if (
present(strm))
then
171 call neko_error(
'no device backend configured')
176 type(c_ptr) :: a_d, b_d, mask_d
178 type(c_ptr),
optional :: strm
181 if (n .lt. 1 .or. n_mask .lt. 1)
return
183 if (
present(strm))
then
194 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
196 call neko_error(
'no device backend configured')
204 real(kind=
rp),
intent(in) :: c
206 type(c_ptr) :: mask_d
208 type(c_ptr),
optional :: strm
211 if (n .lt. 1 .or. n_mask .lt. 1)
return
213 if (
present(strm))
then
226 call neko_error(
'No device backend configured')
234 type(c_ptr),
optional :: strm
239 if (
present(strm))
then
252 call neko_error(
'No device backend configured')
260 type(c_ptr),
optional :: strm
262 real(kind=
rp),
parameter :: one = 1.0_rp
266 if (
present(strm))
then
272#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
275 call neko_error(
'No device backend configured')
282 real(kind=
rp),
intent(in) :: c
284 type(c_ptr),
optional :: strm
289 if (
present(strm))
then
302 call neko_error(
'No device backend configured')
308 type(c_ptr) :: a_d, b_d
309 real(kind=
rp),
intent(in) :: c
311 type(c_ptr),
optional :: strm
316 if (
present(strm))
then
329 call neko_error(
'No device backend configured')
336 real(kind=
rp),
intent(in) :: c
338 type(c_ptr),
optional :: strm
341 if (
present(strm))
then
354 call neko_error(
'No device backend configured')
360 type(c_ptr) :: a_d, b_d
361 real(kind=
rp),
intent(in) :: c
363 type(c_ptr),
optional :: strm
366 if (
present(strm))
then
379 call neko_error(
'No device backend configured')
386 real(kind=
rp),
intent(in) :: c
388 type(c_ptr),
optional :: strm
393 if (
present(strm))
then
406 call neko_error(
'No device backend configured')
414 real(kind=
rp),
intent(in) :: c
416 type(c_ptr),
optional :: strm
421 if (
present(strm))
then
434 call neko_error(
'No device backend configured')
441 real(kind=
rp),
intent(in) :: c
443 type(c_ptr),
optional ::strm
448 if (
present(strm))
then
461 call neko_error(
'No device backend configured')
467 type(c_ptr) :: a_d, b_d
469 type(c_ptr),
optional :: strm
474 if (
present(strm))
then
487 call neko_error(
'No device backend configured')
492 type(c_ptr) :: a_d, b_d, c_d, d_d
494 type(c_ptr),
optional :: strm
499 if (
present(strm))
then
506 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
508 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
512 call neko_error(
'No device backend configured')
517 type(c_ptr) :: a_d, b_d
520 type(c_ptr),
optional :: strm
525 if (
present(strm))
then
538 call neko_error(
'No device backend configured')
545 type(c_ptr) :: a_d, b_d
548 type(c_ptr),
optional :: strm
553 if (
present(strm))
then
566 call neko_error(
'No device backend configured')
572 type(c_ptr) :: a_d, b_d
575 type(c_ptr),
optional :: strm
580 if (
present(strm))
then
593 call neko_error(
'No device backend configured')
599 type(c_ptr) :: a_d, b_d, c_d
601 type(c_ptr),
optional :: strm
606 if (
present(strm))
then
613 call hip_add3(a_d, b_d, c_d, n, strm_)
619 call neko_error(
'No device backend configured')
625 type(c_ptr) :: a_d, b_d, c_d
626 real(kind=
rp) :: c1, c2
628 type(c_ptr),
optional :: strm
633 if (
present(strm))
then
640 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
646 call neko_error(
'No device backend configured')
652 type(c_ptr) :: a_d, b_d, c_d, d_d
653 real(kind=
rp) :: c1, c2, c3
655 type(c_ptr),
optional :: strm
660 if (
present(strm))
then
667 call hip_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
669 call cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
673 call neko_error(
'No device backend configured')
678 subroutine device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2 , c3, c4, n, strm)
679 type(c_ptr) :: a_d, b_d, c_d, d_d, e_d
680 real(kind=
rp) :: c1, c2, c3, c4
682 type(c_ptr),
optional :: strm
687 if (
present(strm))
then
694 call hip_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
696 call cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
698 call opencl_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
700 call neko_error(
'No device backend configured')
708 type(c_ptr),
optional :: strm
713 if (
present(strm))
then
726 call neko_error(
'No device backend configured')
732 type(c_ptr) :: a_d, b_d
734 type(c_ptr),
optional :: strm
739 if (
present(strm))
then
752 call neko_error(
'No device backend configured')
758 type(c_ptr) :: a_d, b_d, c_d
760 type(c_ptr),
optional :: strm
763 if (
present(strm))
then
775 call neko_error(
'opencl_invcol3 not implemented')
777 call neko_error(
'No device backend configured')
783 type(c_ptr) :: a_d, b_d
785 type(c_ptr),
optional :: strm
788 if (
present(strm))
then
802 call neko_error(
'No device backend configured')
808 type(c_ptr) :: a_d, b_d, c_d
810 type(c_ptr),
optional :: strm
815 if (
present(strm))
then
822 call hip_col3(a_d, b_d, c_d, n, strm_)
828 call neko_error(
'No device backend configured')
834 type(c_ptr) :: a_d, b_d, c_d
836 type(c_ptr),
optional :: strm
841 if (
present(strm))
then
854 call neko_error(
'No device backend configured')
860 type(c_ptr) :: a_d, b_d
862 type(c_ptr),
optional :: strm
867 if (
present(strm))
then
880 call neko_error(
'No device backend configured')
886 type(c_ptr) :: a_d, b_d, c_d
888 type(c_ptr),
optional :: strm
893 if (
present(strm))
then
900 call hip_sub3(a_d, b_d, c_d, n, strm_)
906 call neko_error(
'No device backend configured')
912 type(c_ptr) :: a_d, b_d, c_d
914 type(c_ptr),
optional :: strm
919 if (
present(strm))
then
932 call neko_error(
'No device backend configured')
938 type(c_ptr) :: a_d, b_d, c_d, d_d
940 type(c_ptr),
optional :: strm
945 if (
present(strm))
then
958 call neko_error(
'No device backend configured')
964 type(c_ptr) :: a_d, b_d, c_d
967 type(c_ptr),
optional :: strm
972 if (
present(strm))
then
985 call neko_error(
'No device backend configured')
991 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
992 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
994 type(c_ptr),
optional :: strm
999 if (
present(strm))
then
1006 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1008 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1010 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1012 call neko_error(
'No device backend configured')
1019 w1_d, w2_d, w3_d, n, strm)
1020 type(c_ptr) :: u1_d, u2_d, u3_d
1021 type(c_ptr) :: v1_d, v2_d, v3_d
1022 type(c_ptr) :: w1_d, w2_d, w3_d
1024 type(c_ptr),
optional :: strm
1025 type(c_ptr) :: strm_
1027 if (n .lt. 1)
return
1029 if (
present(strm))
then
1036 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1037 w1_d, w2_d, w3_d, n, strm_)
1039 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1040 w1_d, w2_d, w3_d, n, strm_)
1043 w1_d, w2_d, w3_d, n, strm_)
1045 call neko_error(
'No device backend configured')
1052 type(c_ptr) :: u_d, v_d, w_d
1054 type(c_ptr),
optional :: strm
1055 type(c_ptr) :: strm_
1056 real(kind=
rp) :: res
1060 if (n .lt. 1)
return
1062 if (
present(strm))
then
1069 res =
hip_vlsc3(u_d, v_d, w_d, n, strm_)
1076 call neko_error(
'No device backend configured')
1082 type(c_ptr) :: a_d, b_d, c_d
1084 type(c_ptr),
optional :: strm
1085 type(c_ptr) :: strm_
1086 real(kind=
rp) :: res
1088 if (
present(strm))
then
1096 res =
hip_glsc3(a_d, b_d, c_d, n, strm_)
1102 call neko_error(
'No device backend configured')
1105#ifndef HAVE_DEVICE_MPI
1107 call mpi_allreduce(mpi_in_place, res, 1, &
1114 type(c_ptr),
value :: w_d, v_d_d, mult_d
1115 integer(c_int) :: j, n
1117 type(c_ptr),
optional :: strm
1118 type(c_ptr) :: strm_
1121 if (
present(strm))
then
1134 call neko_error(
'No device backend configured')
1137#ifndef HAVE_DEVICE_MPI
1139 call mpi_allreduce(mpi_in_place, h, j, &
1146 type(c_ptr),
value :: y_d, x_d_d, a_d
1147 integer(c_int) :: j, n
1148 type(c_ptr),
optional :: strm
1149 type(c_ptr) :: strm_
1151 if (n .lt. 1)
return
1153 if (
present(strm))
then
1166 call neko_error(
'No device backend configured')
1172 type(c_ptr) :: a_d, b_d
1174 real(kind=
rp) :: res
1175 type(c_ptr),
optional :: strm
1176 type(c_ptr) :: strm_
1178 if (
present(strm))
then
1192 call neko_error(
'No device backend configured')
1195#ifndef HAVE_DEVICE_MPI
1197 call mpi_allreduce(mpi_in_place, res, 1, &
1206 type(c_ptr),
intent(in) :: a_d, b_d
1207 integer,
intent(in) :: n
1209 real(kind=
rp) :: res
1210 type(c_ptr),
optional :: strm
1211 type(c_ptr) :: strm_
1213 if (
present(strm))
then
1227 call neko_error(
'No device backend configured')
1230#ifndef HAVE_DEVICE_MPI
1232 call mpi_allreduce(mpi_in_place, res, 1, &
1244 real(kind=
rp) :: res
1245 type(c_ptr),
optional :: strm
1246 type(c_ptr) :: strm_
1248 if (
present(strm))
then
1262 call neko_error(
'No device backend configured')
1265#ifndef HAVE_DEVICE_MPI
1267 call mpi_allreduce(mpi_in_place, res, 1, &
1274 integer,
intent(in) :: n
1276 type(c_ptr),
optional :: strm
1277 type(c_ptr) :: strm_
1279 if (n .lt. 1)
return
1281 if (
present(strm))
then
1292 call neko_error(
'OPENCL is not implemented for device_absval')
1294 call neko_error(
'No device backend configured')
1305 type(c_ptr) :: a_d, b_d
1307 type(c_ptr),
optional :: strm
1308 type(c_ptr) :: strm_
1310 if (n .lt. 1)
return
1312 if (
present(strm))
then
1323 call neko_error(
'No OpenCL backend for device_pwmax2')
1325 call neko_error(
'No device backend configured')
1332 type(c_ptr) :: a_d, b_d, c_d
1334 type(c_ptr),
optional :: strm
1335 type(c_ptr) :: strm_
1337 if (n .lt. 1)
return
1339 if (
present(strm))
then
1350 call neko_error(
'No OpenCL backend for device_pwmax3')
1352 call neko_error(
'No device backend configured')
1361 real(kind=
rp),
intent(in) :: c
1363 type(c_ptr),
optional :: strm
1364 type(c_ptr) :: strm_
1366 if (n .lt. 1)
return
1368 if (
present(strm))
then
1379 call neko_error(
'No OpenCL backend for device_cpwmax2')
1381 call neko_error(
'No device backend configured')
1389 type(c_ptr) :: a_d, b_d
1390 real(kind=
rp),
intent(in) :: c
1392 type(c_ptr),
optional :: strm
1393 type(c_ptr) :: strm_
1395 if (n .lt. 1)
return
1397 if (
present(strm))
then
1408 call neko_error(
'No OpenCL backend for device_cpwmax3')
1410 call neko_error(
'No device backend configured')
1421 type(c_ptr) :: a_d, b_d
1423 type(c_ptr),
optional :: strm
1424 type(c_ptr) :: strm_
1426 if (n .lt. 1)
return
1428 if (
present(strm))
then
1439 call neko_error(
'No OpenCL backend for device_pwmin2')
1441 call neko_error(
'No device backend configured')
1448 type(c_ptr) :: a_d, b_d, c_d
1450 type(c_ptr),
optional :: strm
1451 type(c_ptr) :: strm_
1453 if (n .lt. 1)
return
1455 if (
present(strm))
then
1466 call neko_error(
'No OpenCL backend for device_pwmin3')
1468 call neko_error(
'No device backend configured')
1477 real(kind=
rp),
intent(in) :: c
1479 type(c_ptr),
optional :: strm
1480 type(c_ptr) :: strm_
1482 if (n .lt. 1)
return
1484 if (
present(strm))
then
1495 call neko_error(
'No OpenCL backend for device_cpwmin2')
1497 call neko_error(
'No device backend configured')
1505 type(c_ptr) :: a_d, b_d
1506 real(kind=
rp),
intent(in) :: c
1508 type(c_ptr),
optional :: strm
1509 type(c_ptr) :: strm_
1511 if (n .lt. 1)
return
1513 if (
present(strm))
then
1524 call neko_error(
'No OpenCL backend for device_cpwmin3')
1526 call neko_error(
'No device backend configured')
1536 type(c_ptr),
intent(inout) :: a_d
1537 integer,
intent(in) :: c
1538 integer,
intent(in) :: n
1539 type(c_ptr),
optional :: strm
1540 type(c_ptr) :: strm_
1541 if (n .lt. 1)
return
1543 if (
present(strm))
then
1556 call neko_error(
'No device backend configured')
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_pwmin2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm)
Returns .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_pwmax3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_cpwmax3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_pwmin3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_cpwmax2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cpwmin3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_cpwmin2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
subroutine, public device_addcol3s2(a_d, b_d, c_d, s, n, strm)
Returns .
subroutine, public device_pwmax2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.