34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only : mpi_sum, mpi_min, mpi_max, mpi_in_place, mpi_allreduce
75 type(c_ptr) :: a_d, b_d
77 type(c_ptr),
optional :: strm
82 if (
present(strm))
then
95 call neko_error(
'no device backend configured')
101 type(c_ptr) :: a_d, b_d, mask_d
103 type(c_ptr),
optional :: strm
106 if (n .lt. 1 .or. n_mask .lt. 1)
return
108 if (
present(strm))
then
121 call neko_error(
'no device backend configured')
127 type(c_ptr) :: a_d, b_d, mask_d
129 type(c_ptr),
optional :: strm
132 if (n .lt. 1 .or. n_mask .lt. 1)
return
134 if (
present(strm))
then
147 call neko_error(
'no device backend configured')
154 type(c_ptr) :: a_d, b_d, mask_d
156 type(c_ptr),
optional :: strm
159 if (n .lt. 1 .or. n_mask .lt. 1)
return
161 if (
present(strm))
then
174 call neko_error(
'no device backend configured')
180 type(c_ptr) :: a_d, b_d, mask_d
182 type(c_ptr),
optional :: strm
185 if (n .lt. 1 .or. n_mask .lt. 1)
return
187 if (
present(strm))
then
200 call neko_error(
'no device backend configured')
205 type(c_ptr) :: a_d, b_d, mask_d
207 type(c_ptr),
optional :: strm
210 if (n .lt. 1 .or. n_mask .lt. 1)
return
212 if (
present(strm))
then
223 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
225 call neko_error(
'no device backend configured')
233 real(kind=
rp),
intent(in) :: c
235 type(c_ptr) :: mask_d
237 type(c_ptr),
optional :: strm
240 if (n .lt. 1 .or. n_mask .lt. 1)
return
242 if (
present(strm))
then
255 call neko_error(
'No device backend configured')
263 type(c_ptr),
optional :: strm
268 if (
present(strm))
then
281 call neko_error(
'No device backend configured')
289 type(c_ptr),
optional :: strm
291 real(kind=
rp),
parameter :: one = 1.0_rp
295 if (
present(strm))
then
301#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
304 call neko_error(
'No device backend configured')
311 real(kind=
rp),
intent(in) :: c
313 type(c_ptr),
optional :: strm
318 if (
present(strm))
then
331 call neko_error(
'No device backend configured')
337 type(c_ptr) :: a_d, b_d
338 real(kind=
rp),
intent(in) :: c
340 type(c_ptr),
optional :: strm
345 if (
present(strm))
then
358 call neko_error(
'No device backend configured')
365 real(kind=
rp),
intent(in) :: c
367 type(c_ptr),
optional :: strm
370 if (
present(strm))
then
383 call neko_error(
'No device backend configured')
389 type(c_ptr) :: a_d, b_d
390 real(kind=
rp),
intent(in) :: c
392 type(c_ptr),
optional :: strm
395 if (
present(strm))
then
408 call neko_error(
'No device backend configured')
415 real(kind=
rp),
intent(in) :: c
417 type(c_ptr),
optional :: strm
422 if (
present(strm))
then
435 call neko_error(
'No device backend configured')
443 real(kind=
rp),
intent(in) :: c
445 type(c_ptr),
optional :: strm
450 if (
present(strm))
then
463 call neko_error(
'No device backend configured')
470 real(kind=
rp),
intent(in) :: c
472 type(c_ptr),
optional ::strm
477 if (
present(strm))
then
490 call neko_error(
'No device backend configured')
496 type(c_ptr) :: a_d, b_d
498 type(c_ptr),
optional :: strm
503 if (
present(strm))
then
516 call neko_error(
'No device backend configured')
521 type(c_ptr) :: a_d, b_d, c_d, d_d
523 type(c_ptr),
optional :: strm
528 if (
present(strm))
then
535 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
537 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
541 call neko_error(
'No device backend configured')
546 type(c_ptr) :: a_d, b_d
549 type(c_ptr),
optional :: strm
554 if (
present(strm))
then
567 call neko_error(
'No device backend configured')
574 type(c_ptr) :: a_d, b_d
577 type(c_ptr),
optional :: strm
582 if (
present(strm))
then
595 call neko_error(
'No device backend configured')
601 type(c_ptr) :: a_d, b_d
604 type(c_ptr),
optional :: strm
609 if (
present(strm))
then
622 call neko_error(
'No device backend configured')
628 type(c_ptr) :: a_d, b_d, c_d
630 type(c_ptr),
optional :: strm
635 if (
present(strm))
then
642 call hip_add3(a_d, b_d, c_d, n, strm_)
648 call neko_error(
'No device backend configured')
654 type(c_ptr) :: a_d, b_d, c_d
655 real(kind=
rp) :: c1, c2
657 type(c_ptr),
optional :: strm
662 if (
present(strm))
then
669 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
675 call neko_error(
'No device backend configured')
681 type(c_ptr) :: a_d, b_d, c_d, d_d
682 real(kind=
rp) :: c1, c2, c3
684 type(c_ptr),
optional :: strm
689 if (
present(strm))
then
696 call hip_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
698 call cuda_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm_)
702 call neko_error(
'No device backend configured')
707 subroutine device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2 , c3, c4, n, strm)
708 type(c_ptr) :: a_d, b_d, c_d, d_d, e_d
709 real(kind=
rp) :: c1, c2, c3, c4
711 type(c_ptr),
optional :: strm
716 if (
present(strm))
then
723 call hip_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
725 call cuda_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
727 call opencl_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm_)
729 call neko_error(
'No device backend configured')
737 type(c_ptr),
optional :: strm
742 if (
present(strm))
then
755 call neko_error(
'No device backend configured')
761 type(c_ptr) :: a_d, b_d
763 type(c_ptr),
optional :: strm
768 if (
present(strm))
then
781 call neko_error(
'No device backend configured')
787 type(c_ptr) :: a_d, b_d, c_d
789 type(c_ptr),
optional :: strm
792 if (
present(strm))
then
804 call neko_error(
'opencl_invcol3 not implemented')
806 call neko_error(
'No device backend configured')
812 type(c_ptr) :: a_d, b_d
814 type(c_ptr),
optional :: strm
817 if (
present(strm))
then
831 call neko_error(
'No device backend configured')
837 type(c_ptr) :: a_d, b_d, c_d
839 type(c_ptr),
optional :: strm
844 if (
present(strm))
then
851 call hip_col3(a_d, b_d, c_d, n, strm_)
857 call neko_error(
'No device backend configured')
863 type(c_ptr) :: a_d, b_d, c_d
865 type(c_ptr),
optional :: strm
870 if (
present(strm))
then
883 call neko_error(
'No device backend configured')
889 type(c_ptr) :: a_d, b_d
891 type(c_ptr),
optional :: strm
896 if (
present(strm))
then
909 call neko_error(
'No device backend configured')
915 type(c_ptr) :: a_d, b_d, c_d
917 type(c_ptr),
optional :: strm
922 if (
present(strm))
then
929 call hip_sub3(a_d, b_d, c_d, n, strm_)
935 call neko_error(
'No device backend configured')
941 type(c_ptr) :: a_d, b_d, c_d
943 type(c_ptr),
optional :: strm
948 if (
present(strm))
then
961 call neko_error(
'No device backend configured')
967 type(c_ptr) :: a_d, b_d, c_d, d_d
969 type(c_ptr),
optional :: strm
974 if (
present(strm))
then
987 call neko_error(
'No device backend configured')
993 type(c_ptr) :: a_d, b_d, c_d
996 type(c_ptr),
optional :: strm
1001 if (
present(strm))
then
1014 call neko_error(
'No device backend configured')
1020 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
1021 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
1023 type(c_ptr),
optional :: strm
1024 type(c_ptr) :: strm_
1026 if (n .lt. 1)
return
1028 if (
present(strm))
then
1035 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1037 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1039 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
1041 call neko_error(
'No device backend configured')
1048 w1_d, w2_d, w3_d, n, strm)
1049 type(c_ptr) :: u1_d, u2_d, u3_d
1050 type(c_ptr) :: v1_d, v2_d, v3_d
1051 type(c_ptr) :: w1_d, w2_d, w3_d
1053 type(c_ptr),
optional :: strm
1054 type(c_ptr) :: strm_
1056 if (n .lt. 1)
return
1058 if (
present(strm))
then
1065 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1066 w1_d, w2_d, w3_d, n, strm_)
1068 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
1069 w1_d, w2_d, w3_d, n, strm_)
1072 w1_d, w2_d, w3_d, n, strm_)
1074 call neko_error(
'No device backend configured')
1081 type(c_ptr) :: u_d, v_d, w_d
1083 type(c_ptr),
optional :: strm
1084 type(c_ptr) :: strm_
1085 real(kind=
rp) :: res
1089 if (n .lt. 1)
return
1091 if (
present(strm))
then
1098 res =
hip_vlsc3(u_d, v_d, w_d, n, strm_)
1105 call neko_error(
'No device backend configured')
1111 type(c_ptr) :: a_d, b_d, c_d
1113 type(c_ptr),
optional :: strm
1114 type(c_ptr) :: strm_
1115 real(kind=
rp) :: res
1117 if (
present(strm))
then
1125 res =
hip_glsc3(a_d, b_d, c_d, n, strm_)
1131 call neko_error(
'No device backend configured')
1134#ifndef HAVE_DEVICE_MPI
1136 call mpi_allreduce(mpi_in_place, res, 1, &
1143 type(c_ptr),
value :: w_d, v_d_d, mult_d
1144 integer(c_int) :: j, n
1146 type(c_ptr),
optional :: strm
1147 type(c_ptr) :: strm_
1150 if (
present(strm))
then
1163 call neko_error(
'No device backend configured')
1166#ifndef HAVE_DEVICE_MPI
1168 call mpi_allreduce(mpi_in_place, h, j, &
1175 type(c_ptr),
value :: y_d, x_d_d, a_d
1176 integer(c_int) :: j, n
1177 type(c_ptr),
optional :: strm
1178 type(c_ptr) :: strm_
1180 if (n .lt. 1)
return
1182 if (
present(strm))
then
1195 call neko_error(
'No device backend configured')
1201 type(c_ptr) :: a_d, b_d
1203 real(kind=
rp) :: res
1204 type(c_ptr),
optional :: strm
1205 type(c_ptr) :: strm_
1207 if (
present(strm))
then
1221 call neko_error(
'No device backend configured')
1224#ifndef HAVE_DEVICE_MPI
1226 call mpi_allreduce(mpi_in_place, res, 1, &
1235 type(c_ptr),
intent(in) :: a_d, b_d
1236 integer,
intent(in) :: n
1238 real(kind=
rp) :: res
1239 type(c_ptr),
optional :: strm
1240 type(c_ptr) :: strm_
1242 if (
present(strm))
then
1256 call neko_error(
'No device backend configured')
1259#ifndef HAVE_DEVICE_MPI
1261 call mpi_allreduce(mpi_in_place, res, 1, &
1273 real(kind=
rp) :: res
1274 type(c_ptr),
optional :: strm
1275 type(c_ptr) :: strm_
1277 if (
present(strm))
then
1291 call neko_error(
'No device backend configured')
1294#ifndef HAVE_DEVICE_MPI
1296 call mpi_allreduce(mpi_in_place, res, 1, &
1306 real(kind=
rp) :: res, ninf
1307 type(c_ptr),
optional :: strm
1308 type(c_ptr) :: strm_
1315 if (
present(strm))
then
1321 ninf = -huge(0.0_rp)
1329 call neko_error(
'No device backend configured')
1332#ifndef HAVE_DEVICE_MPI
1334 call mpi_allreduce(mpi_in_place, res, 1, &
1344 real(kind=
rp) :: res, pinf
1345 type(c_ptr),
optional :: strm
1346 type(c_ptr) :: strm_
1353 if (
present(strm))
then
1367 call neko_error(
'No device backend configured')
1370#ifndef HAVE_DEVICE_MPI
1372 call mpi_allreduce(mpi_in_place, res, 1, &
1379 integer,
intent(in) :: n
1381 type(c_ptr),
optional :: strm
1382 type(c_ptr) :: strm_
1384 if (n .lt. 1)
return
1386 if (
present(strm))
then
1399 call neko_error(
'No device backend configured')
1410 type(c_ptr) :: a_d, b_d
1412 type(c_ptr),
optional :: strm
1413 type(c_ptr) :: strm_
1415 if (n .lt. 1)
return
1417 if (
present(strm))
then
1430 call neko_error(
'No device backend configured')
1437 type(c_ptr) :: a_d, b_d, c_d
1439 type(c_ptr),
optional :: strm
1440 type(c_ptr) :: strm_
1442 if (n .lt. 1)
return
1444 if (
present(strm))
then
1457 call neko_error(
'No device backend configured')
1466 real(kind=
rp),
intent(in) :: c
1468 type(c_ptr),
optional :: strm
1469 type(c_ptr) :: strm_
1471 if (n .lt. 1)
return
1473 if (
present(strm))
then
1486 call neko_error(
'No device backend configured')
1494 type(c_ptr) :: a_d, b_d
1495 real(kind=
rp),
intent(in) :: c
1497 type(c_ptr),
optional :: strm
1498 type(c_ptr) :: strm_
1500 if (n .lt. 1)
return
1502 if (
present(strm))
then
1515 call neko_error(
'No device backend configured')
1526 type(c_ptr) :: a_d, b_d
1528 type(c_ptr),
optional :: strm
1529 type(c_ptr) :: strm_
1531 if (n .lt. 1)
return
1533 if (
present(strm))
then
1546 call neko_error(
'No device backend configured')
1553 type(c_ptr) :: a_d, b_d, c_d
1555 type(c_ptr),
optional :: strm
1556 type(c_ptr) :: strm_
1558 if (n .lt. 1)
return
1560 if (
present(strm))
then
1573 call neko_error(
'No device backend configured')
1582 real(kind=
rp),
intent(in) :: c
1584 type(c_ptr),
optional :: strm
1585 type(c_ptr) :: strm_
1587 if (n .lt. 1)
return
1589 if (
present(strm))
then
1602 call neko_error(
'No device backend configured')
1610 type(c_ptr) :: a_d, b_d
1611 real(kind=
rp),
intent(in) :: c
1613 type(c_ptr),
optional :: strm
1614 type(c_ptr) :: strm_
1616 if (n .lt. 1)
return
1618 if (
present(strm))
then
1631 call neko_error(
'No device backend configured')
1641 type(c_ptr),
intent(inout) :: a_d
1642 integer,
intent(in) :: c
1643 integer,
intent(in) :: n
1644 type(c_ptr),
optional :: strm
1645 type(c_ptr) :: strm_
1646 if (n .lt. 1)
return
1648 if (
present(strm))
then
1661 call neko_error(
'No device backend configured')
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine, public device_pwmin2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine, public device_add4s3(a_d, b_d, c_d, d_d, c1, c2, c3, n, strm)
Returns .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
real(kind=rp) function, public device_glmax(a_d, n, strm)
Max of a vector of length n.
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_pwmax3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_cpwmax3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_pwmin3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_masked_gather_copy_aligned(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_cpwmax2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine, public device_cpwmin3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_cpwmin2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glmin(a_d, n, strm)
Min of a vector of length n.
subroutine, public device_addcol3s2(a_d, b_d, c_d, s, n, strm)
Returns .
subroutine, public device_pwmax2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.