43  use, 
intrinsic :: iso_c_binding
 
  107#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL) 
 
  122#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL) 
 
  138    character(len=*), 
intent(inout) :: name
 
 
  164    type(c_ptr), 
intent(inout) :: x_d
 
  165    integer(c_size_t) :: s
 
  169       call neko_error(
'Memory allocation on device failed')
 
  173       call neko_error(
'Memory allocation on device failed')
 
  178       call neko_error(
'Memory allocation on device failed')
 
 
  185    type(c_ptr), 
intent(inout) :: x_d
 
  188       call neko_error(
'Memory deallocation on device failed')
 
  192       call neko_error(
'Memory deallocation on device failed')
 
  196       call neko_error(
'Memory deallocation on device failed')
 
 
  204    integer, 
intent(in) :: n
 
  205    class(*), 
intent(inout), 
target :: x(:)
 
  206    type(c_ptr), 
intent(inout) :: x_d
 
  207    integer, 
intent(in), 
value :: dir
 
  209    type(c_ptr), 
optional :: strm
 
  210    type(c_ptr) :: ptr_h, copy_stream
 
  211    integer(c_size_t) :: s
 
  213    if (
present(strm)) 
then 
  223      type is (
integer(i8))
 
  229      type is (double precision)
 
 
  242    integer, 
intent(in) :: n
 
  243    class(*), 
intent(inout), 
target :: x(:,:)
 
  244    type(c_ptr), 
intent(inout) :: x_d
 
  245    integer, 
intent(in), 
value :: dir
 
  247    type(c_ptr), 
optional :: strm
 
  248    type(c_ptr) :: ptr_h, copy_stream
 
  249    integer(c_size_t) :: s
 
  251    if (
present(strm)) 
then 
  261      type is (
integer(i8))
 
  267      type is (double precision)
 
 
  280    integer, 
intent(in) :: n
 
  281    class(*), 
intent(inout), 
target :: x(:,:,:)
 
  282    type(c_ptr), 
intent(inout) :: x_d
 
  283    integer, 
intent(in), 
value :: dir
 
  285    type(c_ptr), 
optional :: strm
 
  286    type(c_ptr) :: ptr_h, copy_stream
 
  287    integer(c_size_t) :: s
 
  289    if (
present(strm)) 
then 
  299      type is (
integer(i8))
 
  305      type is (double precision)
 
 
  318    integer, 
intent(in) :: n
 
  319    class(*), 
intent(inout), 
target :: x(:,:,:,:)
 
  320    type(c_ptr), 
intent(inout) :: x_d
 
  321    integer, 
intent(in), 
value :: dir
 
  323    type(c_ptr), 
optional :: strm
 
  324    type(c_ptr) :: ptr_h, copy_stream
 
  325    integer(c_size_t) :: s
 
  327    if (
present(strm)) 
then 
  337      type is (
integer(i8))
 
  343      type is (double precision)
 
 
  358    type(c_ptr), 
intent(inout) :: dst
 
  359    type(c_ptr), 
intent(inout) :: src
 
  360    integer(c_size_t), 
intent(in) :: s
 
  361    integer, 
intent(in), 
value :: dir
 
  362    logical, 
optional :: sync
 
  363    type(c_ptr), 
optional :: strm
 
  364    type(c_ptr) :: copy_stream
 
  365    logical :: sync_device
 
  367    if (
present(sync)) 
then 
  370       sync_device = .false.
 
  373    if (
present(strm)) 
then 
 
  387    type(c_ptr), 
intent(inout) :: ptr_h
 
  388    type(c_ptr), 
intent(inout) :: x_d
 
  389    integer(c_size_t), 
intent(in) :: s
 
  390    integer, 
intent(in), 
value :: dir
 
  391    logical, 
intent(in) :: sync_device
 
  392    type(c_ptr), 
intent(inout) :: stream
 
  397          call neko_error(
'Device memcpy async (host-to-device) failed')
 
  402          call neko_error(
'Device memcpy async (device-to-host) failed')
 
  407          call neko_error(
'Device memcpy async (device-to-device) failed')
 
  410       call neko_error(
'Device memcpy failed (invalid direction')
 
  412    if (sync_device) 
then 
  419          call neko_error(
'Device memcpy async (host-to-device) failed')
 
  424          call neko_error(
'Device memcpy async (device-to-host) failed')
 
  429          call neko_error(
'Device memcpy async (device-to-device) failed')
 
  432       call neko_error(
'Device memcpy failed (invalid direction')
 
  434    if (sync_device) 
then 
  438    if (sync_device) 
then 
  441                                   ptr_h, 0, c_null_ptr, c_null_ptr) &
 
  443             call neko_error(
'Device memcpy (host-to-device) failed')
 
  447                                  0, c_null_ptr, c_null_ptr) &
 
  449             call neko_error(
'Device memcpy (device-to-host) failed')
 
  453                                  0, c_null_ptr, c_null_ptr) &
 
  455             call neko_error(
'Device memcpy (device-to-device) failed')
 
  458          call neko_error(
'Device memcpy failed (invalid direction')
 
  463                                   ptr_h, 0, c_null_ptr, c_null_ptr) &
 
  465             call neko_error(
'Device memcpy (host-to-device) failed')
 
  469                                  0, c_null_ptr, c_null_ptr) &
 
  471             call neko_error(
'Device memcpy (device-to-host) failed')
 
  475                                  0, c_null_ptr, c_null_ptr) &
 
  477             call neko_error(
'Device memcpy (device-to-device) failed')
 
  480          call neko_error(
'Device memcpy failed (invalid direction')
 
 
  488    class(*), 
intent(inout), 
target :: x(:)
 
  489    type(c_ptr), 
intent(inout) :: x_d
 
  490    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  494       htbl_ptr_h%ptr = c_loc(x)
 
  495      type is (
integer(i8))
 
  496       htbl_ptr_h%ptr = c_loc(x)
 
  498       htbl_ptr_h%ptr = c_loc(x)
 
  499      type is (double precision)
 
  500       htbl_ptr_h%ptr = c_loc(x)
 
 
  513    class(*), 
intent(inout), 
target :: x(:,:)
 
  514    type(c_ptr), 
intent(inout) :: x_d
 
  515    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  519       htbl_ptr_h%ptr = c_loc(x)
 
  520      type is (
integer(i8))
 
  521       htbl_ptr_h%ptr = c_loc(x)
 
  523       htbl_ptr_h%ptr = c_loc(x)
 
  524      type is (double precision)
 
  525       htbl_ptr_h%ptr = c_loc(x)
 
 
  538    class(*), 
intent(inout), 
target :: x(:,:,:)
 
  539    type(c_ptr), 
intent(inout) :: x_d
 
  540    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  544       htbl_ptr_h%ptr = c_loc(x)
 
  545      type is (
integer(i8))
 
  546       htbl_ptr_h%ptr = c_loc(x)
 
  548       htbl_ptr_h%ptr = c_loc(x)
 
  549      type is (double precision)
 
  550       htbl_ptr_h%ptr = c_loc(x)
 
 
  563    class(*), 
intent(inout), 
target :: x(:,:,:,:)
 
  564    type(c_ptr), 
intent(inout) :: x_d
 
  565    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  569       htbl_ptr_h%ptr = c_loc(x)
 
  570      type is (
integer(i8))
 
  571       htbl_ptr_h%ptr = c_loc(x)
 
  573       htbl_ptr_h%ptr = c_loc(x)
 
  574      type is (double precision)
 
  575       htbl_ptr_h%ptr = c_loc(x)
 
 
  588    class(*), 
intent(inout), 
target :: x(:)
 
  589    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  593       htbl_ptr_h%ptr = c_loc(x)
 
  594      type is (
integer(i8))
 
  595       htbl_ptr_h%ptr = c_loc(x)
 
  597       htbl_ptr_h%ptr = c_loc(x)
 
  598      type is (double precision)
 
  599       htbl_ptr_h%ptr = c_loc(x)
 
 
  612    class(*), 
intent(inout), 
target :: x(:,:)
 
  613    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  617       htbl_ptr_h%ptr = c_loc(x)
 
  618      type is (
integer(i8))
 
  619       htbl_ptr_h%ptr = c_loc(x)
 
  621       htbl_ptr_h%ptr = c_loc(x)
 
  622      type is (double precision)
 
  623       htbl_ptr_h%ptr = c_loc(x)
 
 
  636    class(*), 
intent(inout), 
target :: x(:,:,:)
 
  637    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  641       htbl_ptr_h%ptr = c_loc(x)
 
  642      type is (
integer(i8))
 
  643       htbl_ptr_h%ptr = c_loc(x)
 
  645       htbl_ptr_h%ptr = c_loc(x)
 
  646      type is (double precision)
 
  647       htbl_ptr_h%ptr = c_loc(x)
 
 
  660    class(*), 
intent(inout), 
target :: x(:,:,:,:)
 
  661    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  665       htbl_ptr_h%ptr = c_loc(x)
 
  666      type is (
integer(i8))
 
  667       htbl_ptr_h%ptr = c_loc(x)
 
  669       htbl_ptr_h%ptr = c_loc(x)
 
  670      type is (double precision)
 
  671       htbl_ptr_h%ptr = c_loc(x)
 
 
  684    integer, 
intent(in) :: n
 
  685    class(*), 
intent(inout), 
target :: x(:)
 
  686    type(c_ptr), 
intent(inout) :: x_d
 
  687    integer(c_size_t) :: s
 
  689    if (c_associated(x_d)) 
then 
  690       call neko_error(
'Device pointer already associated')
 
  696      type is (
integer(i8))
 
  700      type is (double precision)
 
 
  713    integer, 
intent(in) :: n
 
  714    class(*), 
intent(inout), 
target :: x(:,:)
 
  715    type(c_ptr), 
intent(inout) :: x_d
 
  716    integer(c_size_t) :: s
 
  718    if (c_associated(x_d)) 
then 
  719       call neko_error(
'Device pointer already associated')
 
  725      type is (
integer(i8))
 
  729      type is (double precision)
 
 
  742    integer, 
intent(in) :: n
 
  743    class(*), 
intent(inout), 
target :: x(:,:,:)
 
  744    type(c_ptr), 
intent(inout) :: x_d
 
  745    integer(c_size_t) :: s
 
  747    if (c_associated(x_d)) 
then 
  748       call neko_error(
'Device pointer already associated')
 
  754      type is (
integer(i8))
 
  758      type is (double precision)
 
 
  771    integer, 
intent(in) :: n
 
  772    class(*), 
intent(inout), 
target :: x(:,:,:,:)
 
  773    type(c_ptr), 
intent(inout) :: x_d
 
  774    integer(c_size_t) :: s
 
  776    if (c_associated(x_d)) 
then 
  777       call neko_error(
'Device pointer already associated')
 
  783      type is (
integer(i8))
 
  787      type is (double precision)
 
 
  800    class(*), 
intent(inout), 
target :: x(:)
 
  801    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  806       htbl_ptr_h%ptr = c_loc(x)
 
  807      type is (
integer(
i8))
 
  808       htbl_ptr_h%ptr = c_loc(x)
 
  810       htbl_ptr_h%ptr = c_loc(x)
 
  811      type is (double precision)
 
  812       htbl_ptr_h%ptr = c_loc(x)
 
 
  827    class(*), 
intent(inout), 
target :: x(:,:)
 
  828    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  833       htbl_ptr_h%ptr = c_loc(x)
 
  834      type is (
integer(
i8))
 
  835       htbl_ptr_h%ptr = c_loc(x)
 
  837       htbl_ptr_h%ptr = c_loc(x)
 
  838      type is (double precision)
 
  839       htbl_ptr_h%ptr = c_loc(x)
 
 
  854    class(*), 
intent(inout), 
target :: x(:,:,:)
 
  855    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  860       htbl_ptr_h%ptr = c_loc(x)
 
  861      type is (
integer(
i8))
 
  862       htbl_ptr_h%ptr = c_loc(x)
 
  864       htbl_ptr_h%ptr = c_loc(x)
 
  865      type is (double precision)
 
  866       htbl_ptr_h%ptr = c_loc(x)
 
 
  881    class(*), 
intent(inout), 
target :: x(:,:,:,:)
 
  882    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  887       htbl_ptr_h%ptr = c_loc(x)
 
  888      type is (
integer(
i8))
 
  889       htbl_ptr_h%ptr = c_loc(x)
 
  891       htbl_ptr_h%ptr = c_loc(x)
 
  892      type is (double precision)
 
  893       htbl_ptr_h%ptr = c_loc(x)
 
 
  908    class(*), 
intent(in), 
target :: x(:)
 
  909    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  916       htbl_ptr_h%ptr = c_loc(x)
 
  917      type is (
integer(
i8))
 
  918       htbl_ptr_h%ptr = c_loc(x)
 
  920       htbl_ptr_h%ptr = c_loc(x)
 
  921      type is (double precision)
 
  922       htbl_ptr_h%ptr = c_loc(x)
 
  930       call neko_error(
'Array not associated with device')
 
 
  936    class(*), 
intent(in), 
target :: x(:,:)
 
  937    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  944       htbl_ptr_h%ptr = c_loc(x)
 
  945      type is (
integer(
i8))
 
  946       htbl_ptr_h%ptr = c_loc(x)
 
  948       htbl_ptr_h%ptr = c_loc(x)
 
  949      type is (double precision)
 
  950       htbl_ptr_h%ptr = c_loc(x)
 
  958       call neko_error(
'Array not associated with device')
 
 
  964    class(*), 
intent(in), 
target :: x(:,:,:)
 
  965    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
  972       htbl_ptr_h%ptr = c_loc(x)
 
  973      type is (
integer(
i8))
 
  974       htbl_ptr_h%ptr = c_loc(x)
 
  976       htbl_ptr_h%ptr = c_loc(x)
 
  977      type is (double precision)
 
  978       htbl_ptr_h%ptr = c_loc(x)
 
  986       call neko_error(
'Array not associated with device')
 
 
  992    class(*), 
intent(in), 
target :: x(:,:,:,:)
 
  993    type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
 
 1000       htbl_ptr_h%ptr = c_loc(x)
 
 1001      type is (
integer(
i8))
 
 1002       htbl_ptr_h%ptr = c_loc(x)
 
 1004       htbl_ptr_h%ptr = c_loc(x)
 
 1005      type is (double precision)
 
 1006       htbl_ptr_h%ptr = c_loc(x)
 
 1014       call neko_error(
'Array not associated with device')
 
 
 1037    type(c_ptr), 
intent(in) :: stream
 
 
 1055    type(c_ptr), 
intent(inout) :: stream
 
 1056    integer, 
optional :: flags
 
 1059    if (
present(flags)) 
then 
 1061          call neko_error(
'Error during stream create (w. flags)')
 
 1065          call neko_error(
'Error during stream create')
 
 1069    if (
present(flags)) 
then 
 1071          call neko_error(
'Error during stream create (w. flags)')
 
 1075          call neko_error(
'Error during stream create')
 
 1081       call neko_error(
'Error during stream create')
 
 
 1088    type(c_ptr), 
intent(inout) :: stream
 
 1089    integer, 
intent(in) :: flags, prio
 
 1092       call neko_error(
'Error during stream create (w. priority)')
 
 1096       call neko_error(
'Error during stream create (w. priority)')
 
 
 1105    type(c_ptr), 
intent(inout) :: stream
 
 1108       call neko_error(
'Error during stream destroy')
 
 1112       call neko_error(
'Error during stream destroy')
 
 1116       call neko_error(
'Error during stream destroy')
 
 
 1123    type(c_ptr), 
intent(in) :: stream
 
 1124    type(c_ptr), 
target, 
intent(in) :: event
 
 
 1164    type(c_ptr), 
intent(inout) :: event
 
 1165    integer, 
optional :: flags
 
 1168    if (
present(flags)) 
then 
 1170          call neko_error(
'Error during event create (w. flags)')
 
 1178    if (
present(flags)) 
then 
 1180          call neko_error(
'Error during event create (w. flags)')
 
 
 1194    type(c_ptr), 
intent(inout) :: event
 
 1197       call neko_error(
'Error during event destroy')
 
 1201       call neko_error(
'Error during event destroy')
 
 
 1210    type(c_ptr), 
target, 
intent(in) :: event
 
 1211    type(c_ptr), 
intent(in) :: stream
 
 
 1229    type(c_ptr), 
target, 
intent(in) :: event
 
 1239    if (c_associated(event)) 
then 
 
Associate a Fortran array to a (allocated) device pointer.
 
Check if a Fortran array is assoicated with a device pointer.
 
Deassociate a Fortran array from a device pointer.
 
Return the device pointer for an associated Fortran array.
 
Map a Fortran array to a device (allocate and associate)
 
Copy data between host and device (or device and device)
 
Synchronize a device or stream.
 
subroutine cuda_device_name(name)
 
@ cudamemcpydevicetodevice
 
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
 
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
 
Device abstraction, common interface for various accelerators.
 
subroutine, public device_event_record(event, stream)
Record a device event.
 
subroutine, public device_event_sync(event)
Synchronize an event.
 
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
 
subroutine, public device_finalize
 
integer, parameter, public device_to_device
 
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
 
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
 
integer, parameter, public host_to_device
 
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
 
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
 
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
 
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
 
subroutine, public device_profiler_stop()
Stop device profiling.
 
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
 
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
 
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
 
subroutine, public device_profiler_start()
Start device profiling.
 
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
 
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
 
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
 
subroutine, public device_free(x_d)
Deallocate memory on the device.
 
integer, parameter, public device_to_host
 
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
 
subroutine, public device_event_destroy(event)
Destroy a device event.
 
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
 
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
 
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
 
subroutine device_sync_device()
Synchronize the device.
 
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
 
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
 
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
 
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
 
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
 
subroutine device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
 
subroutine, public device_event_create(event, flags)
Create a device event queue.
 
subroutine, public device_name(name)
 
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
 
integer function device_count()
Return the number of available devices.
 
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
 
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
 
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
 
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
 
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
 
subroutine, public device_init
 
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
 
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
 
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
 
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
 
subroutine hip_device_name(name)
 
@ hipmemcpydevicetodevice
 
integer function hip_device_count()
Return the number of available HIP devices.
 
Implements a hash table ADT.
 
integer, parameter, public i8
 
Fortran OpenCL interface.
 
subroutine opencl_device_name(name)
 
integer function opencl_device_count()
Return the number of OpenCL devices.
 
subroutine opencl_finalize
 
OpenCL JIT program library.
 
subroutine opencl_prgm_lib_release
 
C pointer based hash table.