34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only : mpi_sum, mpi_in_place, mpi_allreduce
81 type(c_ptr) :: a_d, b_d
83 type(c_ptr),
optional :: strm
88 if (
present(strm))
then
101 call neko_error(
'no device backend configured')
107 type(c_ptr) :: a_d, b_d, mask_d
109 type(c_ptr),
optional :: strm
112 if (n .lt. 1 .or. n_mask .lt. 1)
return
114 if (
present(strm))
then
127 call neko_error(
'no device backend configured')
133 type(c_ptr) :: a_d, b_d, mask_d
135 type(c_ptr),
optional :: strm
138 if (n .lt. 1 .or. n_mask .lt. 1)
return
140 if (
present(strm))
then
153 call neko_error(
'no device backend configured')
159 type(c_ptr) :: a_d, b_d, mask_d
161 type(c_ptr),
optional :: strm
164 if (n .lt. 1 .or. n_mask .lt. 1)
return
166 if (
present(strm))
then
179 call neko_error(
'no device backend configured')
184 type(c_ptr) :: a_d, b_d, mask_d
186 type(c_ptr),
optional :: strm
189 if (n .lt. 1 .or. n_mask .lt. 1)
return
191 if (
present(strm))
then
202 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
204 call neko_error(
'no device backend configured')
212 real(kind=
rp),
intent(in) :: c
214 type(c_ptr) :: mask_d
216 type(c_ptr),
optional :: strm
219 if (n .lt. 1 .or. n_mask .lt. 1)
return
221 if (
present(strm))
then
234 call neko_error(
'No device backend configured')
242 type(c_ptr),
optional :: strm
247 if (
present(strm))
then
260 call neko_error(
'No device backend configured')
268 type(c_ptr),
optional :: strm
270 real(kind=
rp),
parameter :: one = 1.0_rp
274 if (
present(strm))
then
280#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
283 call neko_error(
'No device backend configured')
290 real(kind=
rp),
intent(in) :: c
292 type(c_ptr),
optional :: strm
297 if (
present(strm))
then
310 call neko_error(
'No device backend configured')
316 type(c_ptr) :: a_d, b_d
317 real(kind=
rp),
intent(in) :: c
319 type(c_ptr),
optional :: strm
324 if (
present(strm))
then
337 call neko_error(
'No device backend configured')
344 real(kind=
rp),
intent(in) :: c
346 type(c_ptr),
optional :: strm
349 if (
present(strm))
then
362 call neko_error(
'No device backend configured')
368 type(c_ptr) :: a_d, b_d
369 real(kind=
rp),
intent(in) :: c
371 type(c_ptr),
optional :: strm
374 if (
present(strm))
then
387 call neko_error(
'No device backend configured')
394 real(kind=
rp),
intent(in) :: c
396 type(c_ptr),
optional :: strm
401 if (
present(strm))
then
414 call neko_error(
'No device backend configured')
422 real(kind=
rp),
intent(in) :: c
424 type(c_ptr),
optional :: strm
429 if (
present(strm))
then
442 call neko_error(
'No device backend configured')
449 real(kind=
rp),
intent(in) :: c
451 type(c_ptr),
optional ::strm
456 if (
present(strm))
then
469 call neko_error(
'No device backend configured')
475 type(c_ptr) :: a_d, b_d
477 type(c_ptr),
optional :: strm
482 if (
present(strm))
then
495 call neko_error(
'No device backend configured')
500 type(c_ptr) :: a_d, b_d, c_d, d_d
502 type(c_ptr),
optional :: strm
507 if (
present(strm))
then
514 call hip_add4(a_d, b_d, c_d, d_d, n, strm_)
516 call cuda_add4(a_d, b_d, c_d, d_d, n, strm_)
520 call neko_error(
'No device backend configured')
525 type(c_ptr) :: a_d, b_d
528 type(c_ptr),
optional :: strm
533 if (
present(strm))
then
546 call neko_error(
'No device backend configured')
553 type(c_ptr) :: a_d, b_d
556 type(c_ptr),
optional :: strm
561 if (
present(strm))
then
574 call neko_error(
'No device backend configured')
580 type(c_ptr) :: a_d, b_d
583 type(c_ptr),
optional :: strm
588 if (
present(strm))
then
601 call neko_error(
'No device backend configured')
607 type(c_ptr) :: a_d, b_d, c_d
609 type(c_ptr),
optional :: strm
614 if (
present(strm))
then
621 call hip_add3(a_d, b_d, c_d, n, strm_)
627 call neko_error(
'No device backend configured')
633 type(c_ptr) :: a_d, b_d, c_d
634 real(kind=
rp) :: c1, c2
636 type(c_ptr),
optional :: strm
641 if (
present(strm))
then
648 call hip_add3s2(a_d, b_d, c_d, c1, c2, n, strm_)
654 call neko_error(
'No device backend configured')
662 type(c_ptr),
optional :: strm
667 if (
present(strm))
then
680 call neko_error(
'No device backend configured')
686 type(c_ptr) :: a_d, b_d
688 type(c_ptr),
optional :: strm
693 if (
present(strm))
then
706 call neko_error(
'No device backend configured')
712 type(c_ptr) :: a_d, b_d, c_d
714 type(c_ptr),
optional :: strm
717 if (
present(strm))
then
729 call neko_error(
'opencl_invcol3 not implemented')
731 call neko_error(
'No device backend configured')
737 type(c_ptr) :: a_d, b_d
739 type(c_ptr),
optional :: strm
742 if (
present(strm))
then
756 call neko_error(
'No device backend configured')
762 type(c_ptr) :: a_d, b_d, c_d
764 type(c_ptr),
optional :: strm
769 if (
present(strm))
then
776 call hip_col3(a_d, b_d, c_d, n, strm_)
782 call neko_error(
'No device backend configured')
788 type(c_ptr) :: a_d, b_d, c_d
790 type(c_ptr),
optional :: strm
795 if (
present(strm))
then
808 call neko_error(
'No device backend configured')
814 type(c_ptr) :: a_d, b_d
816 type(c_ptr),
optional :: strm
821 if (
present(strm))
then
834 call neko_error(
'No device backend configured')
840 type(c_ptr) :: a_d, b_d, c_d
842 type(c_ptr),
optional :: strm
847 if (
present(strm))
then
854 call hip_sub3(a_d, b_d, c_d, n, strm_)
860 call neko_error(
'No device backend configured')
866 type(c_ptr) :: a_d, b_d, c_d
868 type(c_ptr),
optional :: strm
873 if (
present(strm))
then
886 call neko_error(
'No device backend configured')
892 type(c_ptr) :: a_d, b_d, c_d, d_d
894 type(c_ptr),
optional :: strm
899 if (
present(strm))
then
912 call neko_error(
'No device backend configured')
918 subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
919 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
921 type(c_ptr),
optional :: strm
926 if (
present(strm))
then
933 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
935 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
937 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm_)
939 call neko_error(
'No device backend configured')
946 w1_d, w2_d, w3_d, n, strm)
947 type(c_ptr) :: u1_d, u2_d, u3_d
948 type(c_ptr) :: v1_d, v2_d, v3_d
949 type(c_ptr) :: w1_d, w2_d, w3_d
951 type(c_ptr),
optional :: strm
956 if (
present(strm))
then
963 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
964 w1_d, w2_d, w3_d, n, strm_)
966 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
967 w1_d, w2_d, w3_d, n, strm_)
970 w1_d, w2_d, w3_d, n, strm_)
972 call neko_error(
'No device backend configured')
979 type(c_ptr) :: u_d, v_d, w_d
981 type(c_ptr),
optional :: strm
989 if (
present(strm))
then
1003 call neko_error(
'No device backend configured')
1009 type(c_ptr) :: a_d, b_d, c_d
1011 type(c_ptr),
optional :: strm
1012 type(c_ptr) :: strm_
1013 real(kind=
rp) :: res
1015 if (
present(strm))
then
1023 res =
hip_glsc3(a_d, b_d, c_d, n, strm_)
1029 call neko_error(
'No device backend configured')
1032#ifndef HAVE_DEVICE_MPI
1034 call mpi_allreduce(mpi_in_place, res, 1, &
1041 type(c_ptr),
value :: w_d, v_d_d, mult_d
1042 integer(c_int) :: j, n
1044 type(c_ptr),
optional :: strm
1045 type(c_ptr) :: strm_
1048 if (
present(strm))
then
1061 call neko_error(
'No device backend configured')
1064#ifndef HAVE_DEVICE_MPI
1066 call mpi_allreduce(mpi_in_place, h, j, &
1073 type(c_ptr),
value :: y_d, x_d_d, a_d
1074 integer(c_int) :: j, n
1075 type(c_ptr),
optional :: strm
1076 type(c_ptr) :: strm_
1078 if (n .lt. 1)
return
1080 if (
present(strm))
then
1093 call neko_error(
'No device backend configured')
1099 type(c_ptr) :: a_d, b_d
1101 real(kind=
rp) :: res
1102 type(c_ptr),
optional :: strm
1103 type(c_ptr) :: strm_
1105 if (
present(strm))
then
1119 call neko_error(
'No device backend configured')
1122#ifndef HAVE_DEVICE_MPI
1124 call mpi_allreduce(mpi_in_place, res, 1, &
1133 type(c_ptr),
intent(in) :: a_d, b_d
1134 integer,
intent(in) :: n
1136 real(kind=
rp) :: res
1137 type(c_ptr),
optional :: strm
1138 type(c_ptr) :: strm_
1140 if (
present(strm))
then
1154 call neko_error(
'No device backend configured')
1157#ifndef HAVE_DEVICE_MPI
1159 call mpi_allreduce(mpi_in_place, res, 1, &
1171 real(kind=
rp) :: res
1172 type(c_ptr),
optional :: strm
1173 type(c_ptr) :: strm_
1175 if (
present(strm))
then
1189 call neko_error(
'No device backend configured')
1192#ifndef HAVE_DEVICE_MPI
1194 call mpi_allreduce(mpi_in_place, res, 1, &
1201 integer,
intent(in) :: n
1203 type(c_ptr),
optional :: strm
1204 type(c_ptr) :: strm_
1206 if (n .lt. 1)
return
1208 if (
present(strm))
then
1219 call neko_error(
'OPENCL is not implemented for device_absval')
1221 call neko_error(
'No device backend configured')
1232 type(c_ptr) :: a_d, b_d
1234 type(c_ptr),
optional :: strm
1235 type(c_ptr) :: strm_
1237 if (n .lt. 1)
return
1239 if (
present(strm))
then
1250 call neko_error(
'No OpenCL backend for device_pwmax_vec2')
1252 call neko_error(
'No device backend configured')
1259 type(c_ptr) :: a_d, b_d, c_d
1261 type(c_ptr),
optional :: strm
1262 type(c_ptr) :: strm_
1264 if (n .lt. 1)
return
1266 if (
present(strm))
then
1277 call neko_error(
'No OpenCL backend for device_pwmax_vec3')
1279 call neko_error(
'No device backend configured')
1288 real(kind=
rp),
intent(in) :: c
1290 type(c_ptr),
optional :: strm
1291 type(c_ptr) :: strm_
1293 if (n .lt. 1)
return
1295 if (
present(strm))
then
1306 call neko_error(
'No OpenCL backend for device_pwmax_sca2')
1308 call neko_error(
'No device backend configured')
1316 type(c_ptr) :: a_d, b_d
1317 real(kind=
rp),
intent(in) :: c
1319 type(c_ptr),
optional :: strm
1320 type(c_ptr) :: strm_
1322 if (n .lt. 1)
return
1324 if (
present(strm))
then
1335 call neko_error(
'No OpenCL backend for device_pwmax_sca3')
1337 call neko_error(
'No device backend configured')
1348 type(c_ptr) :: a_d, b_d
1350 type(c_ptr),
optional :: strm
1351 type(c_ptr) :: strm_
1353 if (n .lt. 1)
return
1355 if (
present(strm))
then
1366 call neko_error(
'No OpenCL backend for device_pwmin_vec2')
1368 call neko_error(
'No device backend configured')
1375 type(c_ptr) :: a_d, b_d, c_d
1377 type(c_ptr),
optional :: strm
1378 type(c_ptr) :: strm_
1380 if (n .lt. 1)
return
1382 if (
present(strm))
then
1393 call neko_error(
'No OpenCL backend for device_pwmin_vec3')
1395 call neko_error(
'No device backend configured')
1404 real(kind=
rp),
intent(in) :: c
1406 type(c_ptr),
optional :: strm
1407 type(c_ptr) :: strm_
1409 if (n .lt. 1)
return
1411 if (
present(strm))
then
1422 call neko_error(
'No OpenCL backend for device_pwmin_sca2')
1424 call neko_error(
'No device backend configured')
1432 type(c_ptr) :: a_d, b_d
1433 real(kind=
rp),
intent(in) :: c
1435 type(c_ptr),
optional :: strm
1436 type(c_ptr) :: strm_
1438 if (n .lt. 1)
return
1440 if (
present(strm))
then
1451 call neko_error(
'No OpenCL backend for device_pwmin_sca3')
1453 call neko_error(
'No device backend configured')
1463 type(c_ptr),
intent(inout) :: a_d
1464 integer,
intent(in) :: c
1465 integer,
intent(in) :: n
1466 type(c_ptr),
optional :: strm
1467 type(c_ptr) :: strm_
1468 if (n .lt. 1)
return
1470 if (
present(strm))
then
1483 call neko_error(
'No device backend configured')
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
integer, public pe_size
MPI size of communicator.
type(mpi_comm), public neko_comm
MPI communicator.
subroutine device_pwmax_sca2(a_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_add2s1(a_d, b_d, c1, n, strm)
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n, strm)
subroutine device_pwmin_sca2(a_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_sub3(a_d, b_d, c_d, n, strm)
Vector subtraction .
subroutine, public device_masked_scatter_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Scatter a masked vector .
subroutine, public device_add2s2(a_d, b_d, c1, n, strm)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_masked_atomic_reduction_0(a_d, b_d, mask_d, n, n_mask, strm)
subroutine, public device_rzero(a_d, n, strm)
Zero a real vector.
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n, strm)
Compute a cross product (3-d version) assuming vector components etc.
subroutine device_pwmin_sca3(a_d, b_d, c, n, strm)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n, strm)
Compute a dot product (3-d version) assuming vector components etc.
real(kind=rp) function, public device_glsubnorm(a_d, b_d, n, strm)
Returns the norm of the difference of two vectors .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n, strm)
subroutine, public device_cfill_mask(a_d, c, n, mask_d, n_mask, strm)
Fill a constant to a masked vector. .
subroutine, public device_cadd2(a_d, b_d, c, n, strm)
Add a scalar to vector .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine device_pwmax_vec3(a_d, b_d, c_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n, strm)
Compute the point-wise minimum of two vectors .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
subroutine device_pwmax_vec2(a_d, b_d, n, strm)
Compute the point-wise maximum of two vectors .
subroutine, public device_cdiv2(a_d, b_d, c, n, strm)
Division of constant c by array .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n, strm)
subroutine, public device_masked_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Copy a masked vector .
subroutine, public device_subcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_cdiv(a_d, c, n, strm)
Division of constant c by array .
subroutine, public device_absval(a_d, n, strm)
subroutine device_iadd(a_d, c, n, strm)
Add an integer scalar to vector .
subroutine, public device_masked_gather_copy_0(a_d, b_d, mask_d, n, n_mask, strm)
Gather a masked vector .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n, strm)
Returns .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n, strm)
Weighted inner product .
subroutine device_pwmin_vec2(a_d, b_d, n, strm)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsc2(a_d, b_d, n, strm)
Weighted inner product .
subroutine, public device_cmult2(a_d, b_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n, strm)
Returns .
subroutine, public device_cfill(a_d, c, n, strm)
Set all elements to a constant c .
subroutine, public device_add3(a_d, b_d, c_d, n, strm)
Vector addition .
subroutine device_pwmax_sca3(a_d, b_d, c, n, strm)
Compute the point-wise maximum of a vector and a scalar .
subroutine device_radd(a_d, c, n, strm)
Add a scalar to vector .
Device abstraction, common interface for various accelerators.
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.