34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only : mpi_sum, mpi_in_place, mpi_allreduce
74 type(c_ptr) :: a_d, b_d
76 type(c_ptr),
optional :: strm
81 if (
present(strm))
then
94 call neko_error(
'no device backend configured')
100 type(c_ptr) :: a_d, b_d, mask_d
102 type(c_ptr),
optional :: strm
105 if (n .lt. 1 .or. n_mask .lt. 1)
return
107 if (
present(strm))
then
120 call neko_error(
'no device backend configured')
126 type(c_ptr) :: a_d, b_d, mask_d
128 type(c_ptr),
optional :: strm
131 if (n .lt. 1 .or. n_mask .lt. 1)
return
133 if (
present(strm))
then
146 call neko_error(
'no device backend configured')
153 type(c_ptr) :: a_d, b_d, mask_d
155 type(c_ptr),
optional :: strm
158 if (n .lt. 1 .or. n_mask .lt. 1)
return
160 if (
present(strm))
then
173 call neko_error(
'no device backend configured')
179 type(c_ptr) :: a_d, b_d, mask_d
181 type(c_ptr),
optional :: strm
184 if (n .lt. 1 .or. n_mask .lt. 1)
return
186 if (
present(strm))
then
199 call neko_error(
'no device backend configured')
204 type(c_ptr) :: a_d, b_d, mask_d
206 type(c_ptr),
optional :: strm
209 if (n .lt. 1 .or. n_mask .lt. 1)
return
211 if (
present(strm))
then
222 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
224 call neko_error(
'no device backend configured')
232 real(kind=
rp),
intent(in) :: c
234 type(c_ptr) :: mask_d
236 type(c_ptr),
optional :: strm
239 if (n .lt. 1 .or. n_mask .lt. 1)
return
241 if (
present(strm))
then
254 call neko_error(
'No device backend configured')
262 type(c_ptr),
optional :: strm
267 if (
present(strm))
then
280 call neko_error(
'No device backend configured')
288 type(c_ptr),
optional :: strm
290 real(kind=
rp),
parameter :: one = 1.0_rp
294 if (
present(strm))
then
300#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
303 call neko_error(
'No device backend configured')
310 real(kind=
rp),
intent(in) :: c
312 type(c_ptr),
optional :: strm
317 if (
present(strm))
then
330 call neko_error(
'No device backend configured')
336 type(c_ptr) :: a_d, b_d
337 real(kind=
rp),
intent(in) :: c
339 type(c_ptr),
optional :: strm
344 if (
present(strm))
then
357 call neko_error(
'No device backend configured')
364 real(kind=
rp),
intent(in) :: c
366 type(c_ptr),
optional :: strm
369 if (
present(strm))
then
382 call neko_error(
'No device backend configured')
388 type(c_ptr) :: a_d, b_d
389 real(kind=
rp),
intent(in) :: c
391 type(c_ptr),
optional :: strm
394 if (
present(strm))
then
407 call neko_error(
'No device backend configured')
414 real(kind=
rp),
intent(in) :: c
416 type(c_ptr),
optional :: strm
421 if (
present(strm))
then
434 call neko_error(
'No device backend configured')
442 real(kind=
rp),
intent(in) :: c
444 type(c_ptr),
optional :: strm
449 if (
present(strm))
then
462 call neko_error(
'No device backend configured')
469 real(kind=
rp),
intent(in) :: c
471 type(c_ptr),
optional ::strm
476 if (
present(strm))
then
489 call neko_error(
'No device backend configured')
495 type(c_ptr) :: a_d, b_d
497 type(c_ptr),
optional :: strm
502 if (
present(strm))
then
515 call neko_error(
'No device backend configured')
520 type(c_ptr) :: a_d, b_d, c_d, d_d
522 type(c_ptr),
optional :: strm
527 if (
present(strm))
then
534 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
536 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
540 call neko_error(
'No device backend configured')
545 type(c_ptr) :: a_d, b_d
548 type(c_ptr),
optional :: strm
553 if (
present(strm))
then
566 call neko_error(
'No device backend configured')
573 type(c_ptr) :: a_d, b_d
576 type(c_ptr),
optional :: strm
581 if (
present(strm))
then
594 call neko_error(
'No device backend configured')
600 type(c_ptr) :: a_d, b_d
603 type(c_ptr),
optional :: strm
608 if (
present(strm))
then
621 call neko_error(
'No device backend configured')
627 type(c_ptr) :: a_d, b_d, c_d
629 type(c_ptr),
optional :: strm
634 if (
present(strm))
then
641 call hip_add3(a_d, b_d, c_d, n, strm_)
647 call neko_error(
'No device backend configured')
653 type(c_ptr) :: a_d, b_d, c_d
654 real(kind=
rp) :: c1, c2
656 type(c_ptr),
optional :: strm
661 if (
present(strm))
then
668 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
674 call neko_error(
'No device backend configured')
680 type(c_ptr) :: a_d, b_d, c_d, d_d
681 real(kind=
rp) :: c1, c2, c3
683 type(c_ptr),
optional :: strm
688 if (
present(strm))
then
695 call hip_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
697 call cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
701 call neko_error(
'No device backend configured')
706 subroutine device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2 , c3, c4, n, strm)
707 type(c_ptr) :: a_d, b_d, c_d, d_d, e_d
708 real(kind=
rp) :: c1, c2, c3, c4
710 type(c_ptr),
optional :: strm
715 if (
present(strm))
then
722 call hip_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
724 call cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
726 call opencl_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
728 call neko_error(
'No device backend configured')
736 type(c_ptr),
optional :: strm
741 if (
present(strm))
then
754 call neko_error(
'No device backend configured')
760 type(c_ptr) :: a_d, b_d
762 type(c_ptr),
optional :: strm
767 if (
present(strm))
then
780 call neko_error(
'No device backend configured')
786 type(c_ptr) :: a_d, b_d, c_d
788 type(c_ptr),
optional :: strm
791 if (
present(strm))
then
803 call neko_error(
'opencl_invcol3 not implemented')
805 call neko_error(
'No device backend configured')
811 type(c_ptr) :: a_d, b_d
813 type(c_ptr),
optional :: strm
816 if (
present(strm))
then
830 call neko_error(
'No device backend configured')
836 type(c_ptr) :: a_d, b_d, c_d
838 type(c_ptr),
optional :: strm
843 if (
present(strm))
then
850 call hip_col3(a_d, b_d, c_d, n, strm_)
856 call neko_error(
'No device backend configured')
862 type(c_ptr) :: a_d, b_d, c_d
864 type(c_ptr),
optional :: strm
869 if (
present(strm))
then
882 call neko_error(
'No device backend configured')
888 type(c_ptr) :: a_d, b_d
890 type(c_ptr),
optional :: strm
895 if (
present(strm))
then
908 call neko_error(
'No device backend configured')
914 type(c_ptr) :: a_d, b_d, c_d
916 type(c_ptr),
optional :: strm
921 if (
present(strm))
then
928 call hip_sub3(a_d, b_d, c_d, n, strm_)
934 call neko_error(
'No device backend configured')
940 type(c_ptr) :: a_d, b_d, c_d
942 type(c_ptr),
optional :: strm
947 if (
present(strm))
then
960 call neko_error(
'No device backend configured')
966 type(c_ptr) :: a_d, b_d, c_d, d_d
968 type(c_ptr),
optional :: strm
973 if (
present(strm))
then
986 call neko_error(
'No device backend configured')
992 type(c_ptr) :: a_d, b_d, c_d
995 type(c_ptr),
optional :: strm
1000 if (
present(strm))
then
1013 call neko_error(
'No device backend configured')
1019 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
1020 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1022 type(c_ptr),
optional :: strm
1023 type(c_ptr) :: strm_
1025 if (n .lt. 1)
return
1027 if (
present(strm))
then
1034 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1036 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1038 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1040 call neko_error(
'No device backend configured')
1047 w1_d, w2_d, w3_d, n, strm)
1048 type(c_ptr) :: u1_d, u2_d, u3_d
1049 type(c_ptr) :: v1_d, v2_d, v3_d
1050 type(c_ptr) :: w1_d, w2_d, w3_d
1052 type(c_ptr),
optional :: strm
1053 type(c_ptr) :: strm_
1055 if (n .lt. 1)
return
1057 if (
present(strm))
then
1064 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1065 w1_d, w2_d, w3_d, n, strm_)
1067 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1068 w1_d, w2_d, w3_d, n, strm_)
1071 w1_d, w2_d, w3_d, n, strm_)
1073 call neko_error(
'No device backend configured')
1080 type(c_ptr) :: u_d, v_d, w_d
1082 type(c_ptr),
optional :: strm
1083 type(c_ptr) :: strm_
1084 real(kind=
rp) :: res
1088 if (n .lt. 1)
return
1090 if (
present(strm))
then
1097 res =
hip_vlsc3(u_d, v_d, w_d, n, strm_)
1104 call neko_error(
'No device backend configured')
1110 type(c_ptr) :: a_d, b_d, c_d
1112 type(c_ptr),
optional :: strm
1113 type(c_ptr) :: strm_
1114 real(kind=
rp) :: res
1116 if (
present(strm))
then
1124 res =
hip_glsc3(a_d, b_d, c_d, n, strm_)
1130 call neko_error(
'No device backend configured')
1133#ifndef HAVE_DEVICE_MPI
1135 call mpi_allreduce(mpi_in_place, res, 1, &
1142 type(c_ptr),
value :: w_d, v_d_d, mult_d
1143 integer(c_int) :: j, n
1145 type(c_ptr),
optional :: strm
1146 type(c_ptr) :: strm_
1149 if (
present(strm))
then
1162 call neko_error(
'No device backend configured')
1165#ifndef HAVE_DEVICE_MPI
1167 call mpi_allreduce(mpi_in_place, h, j, &
1174 type(c_ptr),
value :: y_d, x_d_d, a_d
1175 integer(c_int) :: j, n
1176 type(c_ptr),
optional :: strm
1177 type(c_ptr) :: strm_
1179 if (n .lt. 1)
return
1181 if (
present(strm))
then
1194 call neko_error(
'No device backend configured')
1200 type(c_ptr) :: a_d, b_d
1202 real(kind=
rp) :: res
1203 type(c_ptr),
optional :: strm
1204 type(c_ptr) :: strm_
1206 if (
present(strm))
then
1220 call neko_error(
'No device backend configured')
1223#ifndef HAVE_DEVICE_MPI
1225 call mpi_allreduce(mpi_in_place, res, 1, &
1234 type(c_ptr),
intent(in) :: a_d, b_d
1235 integer,
intent(in) :: n
1237 real(kind=
rp) :: res
1238 type(c_ptr),
optional :: strm
1239 type(c_ptr) :: strm_
1241 if (
present(strm))
then
1255 call neko_error(
'No device backend configured')
1258#ifndef HAVE_DEVICE_MPI
1260 call mpi_allreduce(mpi_in_place, res, 1, &
1272 real(kind=
rp) :: res
1273 type(c_ptr),
optional :: strm
1274 type(c_ptr) :: strm_
1276 if (
present(strm))
then
1290 call neko_error(
'No device backend configured')
1293#ifndef HAVE_DEVICE_MPI
1295 call mpi_allreduce(mpi_in_place, res, 1, &
1302 integer,
intent(in) :: n
1304 type(c_ptr),
optional :: strm
1305 type(c_ptr) :: strm_
1307 if (n .lt. 1)
return
1309 if (
present(strm))
then
1322 call neko_error(
'No device backend configured')
1333 type(c_ptr) :: a_d, b_d
1335 type(c_ptr),
optional :: strm
1336 type(c_ptr) :: strm_
1338 if (n .lt. 1)
return
1340 if (
present(strm))
then
1353 call neko_error(
'No device backend configured')
1360 type(c_ptr) :: a_d, b_d, c_d
1362 type(c_ptr),
optional :: strm
1363 type(c_ptr) :: strm_
1365 if (n .lt. 1)
return
1367 if (
present(strm))
then
1380 call neko_error(
'No device backend configured')
1389 real(kind=
rp),
intent(in) :: c
1391 type(c_ptr),
optional :: strm
1392 type(c_ptr) :: strm_
1394 if (n .lt. 1)
return
1396 if (
present(strm))
then
1409 call neko_error(
'No device backend configured')
1417 type(c_ptr) :: a_d, b_d
1418 real(kind=
rp),
intent(in) :: c
1420 type(c_ptr),
optional :: strm
1421 type(c_ptr) :: strm_
1423 if (n .lt. 1)
return
1425 if (
present(strm))
then
1438 call neko_error(
'No device backend configured')
1449 type(c_ptr) :: a_d, b_d
1451 type(c_ptr),
optional :: strm
1452 type(c_ptr) :: strm_
1454 if (n .lt. 1)
return
1456 if (
present(strm))
then
1469 call neko_error(
'No device backend configured')
1476 type(c_ptr) :: a_d, b_d, c_d
1478 type(c_ptr),
optional :: strm
1479 type(c_ptr) :: strm_
1481 if (n .lt. 1)
return
1483 if (
present(strm))
then
1496 call neko_error(
'No device backend configured')
1505 real(kind=
rp),
intent(in) :: c
1507 type(c_ptr),
optional :: strm
1508 type(c_ptr) :: strm_
1510 if (n .lt. 1)
return
1512 if (
present(strm))
then
1525 call neko_error(
'No device backend configured')
1533 type(c_ptr) :: a_d, b_d
1534 real(kind=
rp),
intent(in) :: c
1536 type(c_ptr),
optional :: strm
1537 type(c_ptr) :: strm_
1539 if (n .lt. 1)
return
1541 if (
present(strm))
then
1554 call neko_error(
'No device backend configured')
1564 type(c_ptr),
intent(inout) :: a_d
1565 integer,
intent(in) :: c
1566 integer,
intent(in) :: n
1567 type(c_ptr),
optional :: strm
1568 type(c_ptr) :: strm_
1569 if (n .lt. 1)
return
1571 if (
present(strm))
then
1584 call neko_error(
'No device backend configured')
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_pwmin2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm)
Returns .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_pwmax3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_cpwmax3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_pwmin3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_aligned(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_cpwmax2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cpwmin3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_cpwmin2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
subroutine, public device_addcol3s2(a_d, b_d, c_d, s, n, strm)
Returns .
subroutine, public device_pwmax2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.