34 use,
intrinsic :: iso_c_binding, only: c_ptr, c_int
38 use mpi_f08,
only: mpi_sum, mpi_in_place, mpi_allreduce
76 type(c_ptr) :: a_d, b_d
85 call neko_error(
'no device backend configured')
91 type(c_ptr) :: a_d, b_d, mask_d
100 call neko_error(
'no device backend configured')
105 type(c_ptr) :: a_d, b_d, mask_d
112 call neko_error(
'No OpenCL bcknd, masked red copy')
114 call neko_error(
'no device backend configured')
119 type(c_ptr) :: a_d, b_d, mask_d
126 call neko_error(
'No OpenCL bcknd, masked atomic reduction')
128 call neko_error(
'no device backend configured')
136 real(kind=
rp),
intent(in) :: c
138 type(c_ptr) :: mask_d
147 call neko_error(
'No device backend configured')
162 call neko_error(
'No device backend configured')
170 real(kind=
rp),
parameter :: one = 1.0_rp
171#if HAVE_HIP || HAVE_CUDA || HAVE_OPENCL
174 call neko_error(
'No device backend configured')
181 real(kind=
rp),
intent(in) :: c
190 call neko_error(
'No device backend configured')
196 type(c_ptr) :: a_d, b_d
197 real(kind=
rp),
intent(in) :: c
206 call neko_error(
'No device backend configured')
213 real(kind=
rp),
intent(in) :: c
222 call neko_error(
'No device backend configured')
230 real(kind=
rp),
intent(in) :: c
239 call neko_error(
'No device backend configured')
246 real(kind=
rp),
intent(in) :: c
255 call neko_error(
'No device backend configured')
261 type(c_ptr) :: a_d, b_d
270 call neko_error(
'No device backend configured')
275 type(c_ptr) :: a_d, b_d, c_d, d_d
278 call hip_add4(a_d, b_d, c_d, d_d, n)
284 call neko_error(
'No device backend configured')
289 type(c_ptr) :: a_d, b_d
299 call neko_error(
'No device backend configured')
306 type(c_ptr) :: a_d, b_d
316 call neko_error(
'No device backend configured')
322 type(c_ptr) :: a_d, b_d
332 call neko_error(
'No device backend configured')
338 type(c_ptr) :: a_d, b_d, c_d
347 call neko_error(
'No device backend configured')
353 type(c_ptr) :: a_d, b_d, c_d
354 real(kind=
rp) :: c1, c2
363 call neko_error(
'No device backend configured')
378 call neko_error(
'No device backend configured')
384 type(c_ptr) :: a_d, b_d
393 call neko_error(
'No device backend configured')
399 type(c_ptr) :: a_d, b_d
408 call neko_error(
'No device backend configured')
414 type(c_ptr) :: a_d, b_d, c_d
423 call neko_error(
'No device backend configured')
429 type(c_ptr) :: a_d, b_d, c_d
438 call neko_error(
'No device backend configured')
444 type(c_ptr) :: a_d, b_d
453 call neko_error(
'No device backend configured')
459 type(c_ptr) :: a_d, b_d, c_d
468 call neko_error(
'No device backend configured')
474 type(c_ptr) :: a_d, b_d, c_d
483 call neko_error(
'No device backend configured')
489 type(c_ptr) :: a_d, b_d, c_d, d_d
498 call neko_error(
'No device backend configured')
505 type(c_ptr) :: dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d
508 call hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
510 call cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
512 call opencl_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
514 call neko_error(
'No device backend configured')
522 type(c_ptr) :: u1_d, u2_d, u3_d
523 type(c_ptr) :: v1_d, v2_d, v3_d
524 type(c_ptr) :: w1_d, w2_d, w3_d
527 call hip_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
530 call cuda_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, &
535 call neko_error(
'No device backend configured')
542 type(c_ptr) :: u_d, v_d, w_d
554 call neko_error(
'No device backend configured')
560 type(c_ptr) :: a_d, b_d, c_d
570 call neko_error(
'No device backend configured')
573#ifndef HAVE_DEVICE_MPI
575 call mpi_allreduce(mpi_in_place, res, 1, &
582 type(c_ptr),
value :: w_d, v_d_d, mult_d
583 integer(c_int) :: j, n
593 call neko_error(
'No device backend configured')
596#ifndef HAVE_DEVICE_MPI
598 call mpi_allreduce(mpi_in_place, h, j, &
605 type(c_ptr),
value :: y_d, x_d_d, a_d
606 integer(c_int) :: j, n
614 call neko_error(
'No device backend configured')
620 type(c_ptr) :: a_d, b_d
630 call neko_error(
'No device backend configured')
633#ifndef HAVE_DEVICE_MPI
635 call mpi_allreduce(mpi_in_place, res, 1, &
653 call neko_error(
'No device backend configured')
656#ifndef HAVE_DEVICE_MPI
658 call mpi_allreduce(mpi_in_place, res, 1, &
665 integer,
intent(in) :: n
672 call neko_error(
'OPENCL is not implemented for device_absval')
674 call neko_error(
'No device backend configured')
685 type(c_ptr) :: a_d, b_d
689 call neko_error(
'No HIP backend for device_pwmax_vec2')
693 call neko_error(
'No OpenCL backend for device_pwmax_vec2')
695 call neko_error(
'No device backend configured')
702 type(c_ptr) :: a_d, b_d, c_d
706 call neko_error(
'No HIP backend for device_pwmax_vec3')
710 call neko_error(
'No OpenCL backend for device_pwmax_vec3')
712 call neko_error(
'No device backend configured')
721 real(kind=
rp),
intent(in) :: c
725 call neko_error(
'No HIP backend for device_pwmax_sca2')
729 call neko_error(
'No OpenCL backend for device_pwmax_sca2')
731 call neko_error(
'No device backend configured')
739 type(c_ptr) :: a_d, b_d
740 real(kind=
rp),
intent(in) :: c
744 call neko_error(
'No HIP backend for device_pwmax_sca3')
748 call neko_error(
'No OpenCL backend for device_pwmax_sca3')
750 call neko_error(
'No device backend configured')
761 type(c_ptr) :: a_d, b_d
765 call neko_error(
'No HIP backend for device_pwmin_vec2')
769 call neko_error(
'No OpenCL backend for device_pwmin_vec2')
771 call neko_error(
'No device backend configured')
778 type(c_ptr) :: a_d, b_d, c_d
782 call neko_error(
'No HIP backend for device_pwmin_vec3')
786 call neko_error(
'No OpenCL backend for device_pwmin_vec3')
788 call neko_error(
'No device backend configured')
797 real(kind=
rp),
intent(in) :: c
801 call neko_error(
'No HIP backend for device_pwmin_sca2')
805 call neko_error(
'No OpenCL backend for device_pwmin_sca2')
807 call neko_error(
'No device backend configured')
815 type(c_ptr) :: a_d, b_d
816 real(kind=
rp),
intent(in) :: c
820 call neko_error(
'No HIP backend for device_pwmin_sca3')
824 call neko_error(
'No OpenCL backend for device_pwmin_sca3')
826 call neko_error(
'No device backend configured')
type(mpi_comm) neko_comm
MPI communicator.
type(mpi_datatype) mpi_real_precision
MPI type for working precision of REAL types.
integer pe_size
MPI size of communicator.
subroutine, public device_add2(a_d, b_d, n)
Vector addition .
subroutine device_pwmax_sca2(a_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_addcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_col2(a_d, b_d, n)
Vector multiplication .
subroutine, public device_add2s1(a_d, b_d, c1, n)
subroutine, public device_rzero(a_d, n)
Zero a real vector.
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n)
Compute multiplication sum .
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_add2s2(a_d, b_d, c1, n)
Vector addition with scalar multiplication (multiplication on first argument)
subroutine device_pwmax_vec2(a_d, b_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_invcol1(a_d, n)
Invert a vector .
subroutine, public device_col3(a_d, b_d, c_d, n)
Vector multiplication with 3 vectors .
subroutine, public device_add4(a_d, b_d, c_d, d_d, n)
subroutine, public device_cadd(a_d, c, n)
Add a scalar to vector .
subroutine, public device_masked_red_copy(a_d, b_d, mask_d, n, m)
subroutine, public device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
Compute a dot product (3-d version) assuming vector components etc.
subroutine, public device_cmult2(a_d, b_d, c, n)
Multiplication by constant c .
subroutine, public device_vcross(u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, w1_d, w2_d, w3_d, n)
Compute a cross product (3-d version) assuming vector components etc.
subroutine, public device_cmult(a_d, c, n)
Multiplication by constant c .
subroutine device_pwmax_sca3(a_d, b_d, c, n)
Compute the point-wise maximum of a vector and a scalar .
subroutine, public device_absval(a_d, n)
subroutine, public device_masked_copy(a_d, b_d, mask_d, n, m)
Copy a masked vector .
subroutine device_pwmax_vec3(a_d, b_d, c_d, n)
Compute the point-wise maximum of two vectors .
subroutine, public device_add2s2_many(y_d, x_d_d, a_d, j, n)
subroutine device_pwmin_sca3(a_d, b_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine, public device_masked_atomic_reduction(a_d, b_d, mask_d, n, m)
subroutine, public device_cfill_mask(a_d, c, size, mask_d, mask_size)
Fill a constant to a masked vector. .
real(kind=rp) function, public device_glsc2(a_d, b_d, n)
Weighted inner product .
subroutine, public device_sub3(a_d, b_d, c_d, n)
Vector subtraction .
real(kind=rp) function, public device_glsc3(a_d, b_d, c_d, n)
Weighted inner product .
subroutine, public device_add3(a_d, b_d, c_d, n)
Vector addition .
subroutine device_pwmin_vec3(a_d, b_d, c_d, n)
Compute the point-wise minimum of two vectors .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
subroutine, public device_cadd2(a_d, b_d, c, n)
Add a scalar to vector .
subroutine, public device_copy(a_d, b_d, n)
Copy a vector .
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n)
Returns .
subroutine, public device_subcol3(a_d, b_d, c_d, n)
Returns .
subroutine, public device_glsc3_many(h, w_d, v_d_d, mult_d, j, n)
subroutine, public device_sub2(a_d, b_d, n)
Vector substraction .
subroutine, public device_cfill(a_d, c, n)
Set all elements to a constant c .
subroutine, public device_addcol4(a_d, b_d, c_d, d_d, n)
Returns .
subroutine device_pwmin_sca2(a_d, c, n)
Compute the point-wise minimum of a vector and a scalar .
subroutine device_pwmin_vec2(a_d, b_d, n)
Compute the point-wise minimum of two vectors .
subroutine, public device_invcol2(a_d, b_d, n)
Vector division .
subroutine, public device_addsqr2s2(a_d, b_d, c1, n)
Returns .
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.