34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only : mpi_sum, mpi_min, mpi_max, mpi_in_place, mpi_allreduce
76 type(c_ptr) :: a_d, b_d
78 type(c_ptr),
optional :: strm
83 if (
present(strm))
then
96 call neko_error(
'no device backend configured')
102 type(c_ptr) :: a_d, b_d, mask_d
104 type(c_ptr),
optional :: strm
107 if (n .lt. 1 .or. n_mask .lt. 1)
return
109 if (
present(strm))
then
122 call neko_error(
'no device backend configured')
128 type(c_ptr) :: a_d, b_d, mask_d
130 type(c_ptr),
optional :: strm
133 if (n .lt. 1 .or. n_mask .lt. 1)
return
135 if (
present(strm))
then
148 call neko_error(
'no device backend configured')
154 n2, lx, ly, lz, n_mask, strm)
155 type(c_ptr) :: a_d, b_d, mask_d, facet_d
156 integer :: n1, n2, lx, ly, lz, n_mask
157 type(c_ptr),
optional :: strm
160 if (n_mask .lt. 1)
return
162 if (
present(strm))
then
170 ly, lz, n_mask, strm_)
173 ly, lz, n_mask, strm_)
176 lx, ly, lz, n_mask, strm_)
178 call neko_error(
'no device backend configured')
185 type(c_ptr) :: a_d, b_d, mask_d
187 type(c_ptr),
optional :: strm
190 if (n .lt. 1 .or. n_mask .lt. 1)
return
192 if (
present(strm))
then
205 call neko_error(
'no device backend configured')
211 type(c_ptr) :: a_d, b_d, mask_d
213 type(c_ptr),
optional :: strm
216 if (n .lt. 1 .or. n_mask .lt. 1)
return
218 if (
present(strm))
then
231 call neko_error(
'no device backend configured')
238 type(c_ptr) :: a_d, b_d, mask_d
240 type(c_ptr),
optional :: strm
243 if (n .lt. 1 .or. n_mask .lt. 1)
return
245 if (
present(strm))
then
258 call neko_error(
'no device backend configured')
263 type(c_ptr) :: a_d, b_d, mask_d
265 type(c_ptr),
optional :: strm
268 if (n .lt. 1 .or. n_mask .lt. 1)
return
270 if (
present(strm))
then
281 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
283 call neko_error(
'no device backend configured')
291 real(kind=
rp),
intent(in) :: c
293 type(c_ptr) :: mask_d
295 type(c_ptr),
optional :: strm
298 if (n .lt. 1 .or. n_mask .lt. 1)
return
300 if (
present(strm))
then
313 call neko_error(
'No device backend configured')
321 type(c_ptr),
optional :: strm
326 if (
present(strm))
then
339 call neko_error(
'No device backend configured')
347 type(c_ptr),
optional :: strm
349 real(kind=
rp),
parameter :: one = 1.0_rp
353 if (
present(strm))
then
359#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
362 call neko_error(
'No device backend configured')
369 real(kind=
rp),
intent(in) :: c
371 type(c_ptr),
optional :: strm
376 if (
present(strm))
then
389 call neko_error(
'No device backend configured')
395 type(c_ptr) :: a_d, b_d
396 real(kind=
rp),
intent(in) :: c
398 type(c_ptr),
optional :: strm
403 if (
present(strm))
then
416 call neko_error(
'No device backend configured')
423 real(kind=
rp),
intent(in) :: c
425 type(c_ptr),
optional :: strm
428 if (
present(strm))
then
441 call neko_error(
'No device backend configured')
447 type(c_ptr) :: a_d, b_d
448 real(kind=
rp),
intent(in) :: c
450 type(c_ptr),
optional :: strm
453 if (
present(strm))
then
466 call neko_error(
'No device backend configured')
473 real(kind=
rp),
intent(in) :: c
475 type(c_ptr),
optional :: strm
480 if (
present(strm))
then
493 call neko_error(
'No device backend configured')
501 real(kind=
rp),
intent(in) :: c
503 type(c_ptr),
optional :: strm
508 if (
present(strm))
then
521 call neko_error(
'No device backend configured')
528 real(kind=
rp),
intent(in) :: min_val, max_val
530 type(c_ptr),
optional :: strm
533 if (n .lt. 1 .or. max_val .le. min_val)
return
535 if (
present(strm))
then
542 call hip_cwrap(a_d, min_val, max_val, n, strm_)
544 call cuda_cwrap(a_d, min_val, max_val, n, strm_)
548 call neko_error(
'No device backend configured')
555 real(kind=
rp),
intent(in) :: c
557 type(c_ptr),
optional ::strm
562 if (
present(strm))
then
575 call neko_error(
'No device backend configured')
581 type(c_ptr) :: a_d, b_d
583 type(c_ptr),
optional :: strm
588 if (
present(strm))
then
601 call neko_error(
'No device backend configured')
606 type(c_ptr) :: a_d, b_d, c_d, d_d
608 type(c_ptr),
optional :: strm
613 if (
present(strm))
then
620 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
622 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
626 call neko_error(
'No device backend configured')
631 type(c_ptr) :: a_d, b_d
634 type(c_ptr),
optional :: strm
639 if (
present(strm))
then
652 call neko_error(
'No device backend configured')
659 type(c_ptr) :: a_d, b_d
662 type(c_ptr),
optional :: strm
667 if (
present(strm))
then
680 call neko_error(
'No device backend configured')
686 type(c_ptr) :: a_d, b_d
689 type(c_ptr),
optional :: strm
694 if (
present(strm))
then
707 call neko_error(
'No device backend configured')
713 type(c_ptr) :: a_d, b_d, c_d
715 type(c_ptr),
optional :: strm
720 if (
present(strm))
then
727 call hip_add3(a_d, b_d, c_d, n, strm_)
733 call neko_error(
'No device backend configured')
739 type(c_ptr) :: a_d, b_d, c_d
740 real(kind=
rp) :: c1, c2
742 type(c_ptr),
optional :: strm
747 if (
present(strm))
then
754 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
760 call neko_error(
'No device backend configured')
766 type(c_ptr) :: a_d, b_d, c_d, d_d
767 real(kind=
rp) :: c1, c2, c3
769 type(c_ptr),
optional :: strm
774 if (
present(strm))
then
781 call hip_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
783 call cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
787 call neko_error(
'No device backend configured')
792 subroutine device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2 , c3, c4, n, strm)
793 type(c_ptr) :: a_d, b_d, c_d, d_d, e_d
794 real(kind=
rp) :: c1, c2, c3, c4
796 type(c_ptr),
optional :: strm
801 if (
present(strm))
then
808 call hip_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
810 call cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
812 call opencl_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
814 call neko_error(
'No device backend configured')
822 type(c_ptr),
optional :: strm
827 if (
present(strm))
then
840 call neko_error(
'No device backend configured')
846 type(c_ptr) :: a_d, b_d
848 type(c_ptr),
optional :: strm
853 if (
present(strm))
then
866 call neko_error(
'No device backend configured')
872 type(c_ptr) :: a_d, b_d, c_d
874 type(c_ptr),
optional :: strm
877 if (
present(strm))
then
889 call neko_error(
'opencl_invcol3 not implemented')
891 call neko_error(
'No device backend configured')
897 type(c_ptr) :: a_d, b_d
899 type(c_ptr),
optional :: strm
902 if (
present(strm))
then
916 call neko_error(
'No device backend configured')
922 type(c_ptr) :: a_d, b_d, c_d
924 type(c_ptr),
optional :: strm
929 if (
present(strm))
then
936 call hip_col3(a_d, b_d, c_d, n, strm_)
942 call neko_error(
'No device backend configured')
948 type(c_ptr) :: a_d, b_d, c_d
950 type(c_ptr),
optional :: strm
955 if (
present(strm))
then
968 call neko_error(
'No device backend configured')
974 type(c_ptr) :: a_d, b_d
976 type(c_ptr),
optional :: strm
981 if (
present(strm))
then
994 call neko_error(
'No device backend configured')
1000 type(c_ptr) :: a_d, b_d, c_d
1002 type(c_ptr),
optional :: strm
1003 type(c_ptr) :: strm_
1005 if (n .lt. 1)
return
1007 if (
present(strm))
then
1014 call hip_sub3(a_d, b_d, c_d, n, strm_)
1020 call neko_error(
'No device backend configured')
1026 type(c_ptr) :: a_d, b_d, c_d
1028 type(c_ptr),
optional :: strm
1029 type(c_ptr) :: strm_
1031 if (n .lt. 1)
return
1033 if (
present(strm))
then
1046 call neko_error(
'No device backend configured')
1052 type(c_ptr) :: a_d, b_d, c_d, d_d
1054 type(c_ptr),
optional :: strm
1055 type(c_ptr) :: strm_
1057 if (n .lt. 1)
return
1059 if (
present(strm))
then
1072 call neko_error(
'No device backend configured')
1078 type(c_ptr) :: a_d, b_d, c_d
1081 type(c_ptr),
optional :: strm
1082 type(c_ptr) :: strm_
1084 if (n .lt. 1)
return
1086 if (
present(strm))
then
1099 call neko_error(
'No device backend configured')
1105 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
1106 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1108 type(c_ptr),
optional :: strm
1109 type(c_ptr) :: strm_
1111 if (n .lt. 1)
return
1113 if (
present(strm))
then
1120 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1122 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1124 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1126 call neko_error(
'No device backend configured')
1133 w1_d, w2_d, w3_d, n, strm)
1134 type(c_ptr) :: u1_d, u2_d, u3_d
1135 type(c_ptr) :: v1_d, v2_d, v3_d
1136 type(c_ptr) :: w1_d, w2_d, w3_d
1138 type(c_ptr),
optional :: strm
1139 type(c_ptr) :: strm_
1141 if (n .lt. 1)
return
1143 if (
present(strm))
then
1150 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1151 w1_d, w2_d, w3_d, n, strm_)
1153 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1154 w1_d, w2_d, w3_d, n, strm_)
1157 w1_d, w2_d, w3_d, n, strm_)
1159 call neko_error(
'No device backend configured')
1166 type(c_ptr) :: u_d, v_d, w_d
1168 type(c_ptr),
optional :: strm
1169 type(c_ptr) :: strm_
1170 real(kind=
rp) :: res
1174 if (n .lt. 1)
return
1176 if (
present(strm))
then
1183 res =
hip_vlsc3(u_d, v_d, w_d, n, strm_)
1190 call neko_error(
'No device backend configured')
1196 type(c_ptr) :: a_d, b_d, c_d
1198 type(c_ptr),
optional :: strm
1199 type(c_ptr) :: strm_
1200 real(kind=
rp) :: res
1202 if (
present(strm))
then
1210 res =
hip_glsc3(a_d, b_d, c_d, n, strm_)
1216 call neko_error(
'No device backend configured')
1219#ifndef HAVE_DEVICE_MPI
1221 call mpi_allreduce(mpi_in_place, res, 1, &
1228 type(c_ptr),
value :: w_d, v_d_d, mult_d
1229 integer(c_int) :: j, n
1231 type(c_ptr),
optional :: strm
1232 type(c_ptr) :: strm_
1235 if (
present(strm))
then
1248 call neko_error(
'No device backend configured')
1251#ifndef HAVE_DEVICE_MPI
1253 call mpi_allreduce(mpi_in_place, h, j, &
1260 type(c_ptr),
value :: y_d, x_d_d, a_d
1261 integer(c_int) :: j, n
1262 type(c_ptr),
optional :: strm
1263 type(c_ptr) :: strm_
1265 if (n .lt. 1)
return
1267 if (
present(strm))
then
1280 call neko_error(
'No device backend configured')
1286 type(c_ptr) :: a_d, b_d
1288 real(kind=
rp) :: res
1289 type(c_ptr),
optional :: strm
1290 type(c_ptr) :: strm_
1292 if (
present(strm))
then
1306 call neko_error(
'No device backend configured')
1309#ifndef HAVE_DEVICE_MPI
1311 call mpi_allreduce(mpi_in_place, res, 1, &
1320 type(c_ptr),
intent(in) :: a_d, b_d
1321 integer,
intent(in) :: n
1323 real(kind=
rp) :: res
1324 type(c_ptr),
optional :: strm
1325 type(c_ptr) :: strm_
1327 if (
present(strm))
then
1341 call neko_error(
'No device backend configured')
1344#ifndef HAVE_DEVICE_MPI
1346 call mpi_allreduce(mpi_in_place, res, 1, &
1358 real(kind=
rp) :: res
1359 type(c_ptr),
optional :: strm
1360 type(c_ptr) :: strm_
1362 if (
present(strm))
then
1376 call neko_error(
'No device backend configured')
1379#ifndef HAVE_DEVICE_MPI
1381 call mpi_allreduce(mpi_in_place, res, 1, &
1391 real(kind=
rp) :: res, ninf
1392 type(c_ptr),
optional :: strm
1393 type(c_ptr) :: strm_
1400 if (
present(strm))
then
1406 ninf = -huge(0.0_rp)
1414 call neko_error(
'No device backend configured')
1417#ifndef HAVE_DEVICE_MPI
1419 call mpi_allreduce(mpi_in_place, res, 1, &
1429 real(kind=
rp) :: res, pinf
1430 type(c_ptr),
optional :: strm
1431 type(c_ptr) :: strm_
1438 if (
present(strm))
then
1452 call neko_error(
'No device backend configured')
1455#ifndef HAVE_DEVICE_MPI
1457 call mpi_allreduce(mpi_in_place, res, 1, &
1464 integer,
intent(in) :: n
1466 type(c_ptr),
optional :: strm
1467 type(c_ptr) :: strm_
1469 if (n .lt. 1)
return
1471 if (
present(strm))
then
1484 call neko_error(
'No device backend configured')
1495 type(c_ptr) :: a_d, b_d
1497 type(c_ptr),
optional :: strm
1498 type(c_ptr) :: strm_
1500 if (n .lt. 1)
return
1502 if (
present(strm))
then
1515 call neko_error(
'No device backend configured')
1522 type(c_ptr) :: a_d, b_d, c_d
1524 type(c_ptr),
optional :: strm
1525 type(c_ptr) :: strm_
1527 if (n .lt. 1)
return
1529 if (
present(strm))
then
1542 call neko_error(
'No device backend configured')
1551 real(kind=
rp),
intent(in) :: c
1553 type(c_ptr),
optional :: strm
1554 type(c_ptr) :: strm_
1556 if (n .lt. 1)
return
1558 if (
present(strm))
then
1571 call neko_error(
'No device backend configured')
1579 type(c_ptr) :: a_d, b_d
1580 real(kind=
rp),
intent(in) :: c
1582 type(c_ptr),
optional :: strm
1583 type(c_ptr) :: strm_
1585 if (n .lt. 1)
return
1587 if (
present(strm))
then
1600 call neko_error(
'No device backend configured')
1611 type(c_ptr) :: a_d, b_d
1613 type(c_ptr),
optional :: strm
1614 type(c_ptr) :: strm_
1616 if (n .lt. 1)
return
1618 if (
present(strm))
then
1631 call neko_error(
'No device backend configured')
1638 type(c_ptr) :: a_d, b_d, c_d
1640 type(c_ptr),
optional :: strm
1641 type(c_ptr) :: strm_
1643 if (n .lt. 1)
return
1645 if (
present(strm))
then
1658 call neko_error(
'No device backend configured')
1667 real(kind=
rp),
intent(in) :: c
1669 type(c_ptr),
optional :: strm
1670 type(c_ptr) :: strm_
1672 if (n .lt. 1)
return
1674 if (
present(strm))
then
1687 call neko_error(
'No device backend configured')
1695 type(c_ptr) :: a_d, b_d
1696 real(kind=
rp),
intent(in) :: c
1698 type(c_ptr),
optional :: strm
1699 type(c_ptr) :: strm_
1701 if (n .lt. 1)
return
1703 if (
present(strm))
then
1716 call neko_error(
'No device backend configured')
1726 type(c_ptr),
intent(inout) :: a_d
1727 integer,
intent(in) :: c
1728 integer,
intent(in) :: n
1729 type(c_ptr),
optional :: strm
1730 type(c_ptr) :: strm_
1731 if (n .lt. 1)
return
1733 if (
present(strm))
then
1746 call neko_error(
'No device backend configured')
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_pwmin2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_masked_scatter_copy_aligned(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm)
Returns .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
real(kind=rp) function, public device_glmax(a_d, n, strm)
Max of a vector of length n.
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_pwmax3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_cpwmax3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_pwmin3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_aligned(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_cpwmax2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cpwmin3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_cwrap(a_d, min_val, max_val, n, strm)
Wrap value around a range (min, max)
subroutine, public device_face_masked_gather_copy_0(a_d, b_d, mask_d, facet_d, n1, n2, lx, ly, lz, n_mask, strm)
Gather a face-local SEM field .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_cpwmin2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glmin(a_d, n, strm)
Min of a vector of length n.
subroutine, public device_addcol3s2(a_d, b_d, c_d, s, n, strm)
Returns .
subroutine, public device_pwmax2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.