42 use,
intrinsic :: iso_c_binding
122#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
137#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
153 character(len=*),
intent(inout) :: name
179 type(c_ptr),
intent(inout) :: x_d
180 integer(c_size_t) :: s
184 call neko_error(
'Memory allocation on device failed')
188 call neko_error(
'Memory allocation on device failed')
193 call neko_error(
'Memory allocation on device failed')
200 type(c_ptr),
intent(inout) :: x_d
203 call neko_error(
'Memory deallocation on device failed')
207 call neko_error(
'Memory deallocation on device failed')
211 call neko_error(
'Memory deallocation on device failed')
219 integer,
intent(in) :: n
220 class(*),
intent(inout),
target :: x(:)
221 type(c_ptr),
intent(inout) :: x_d
222 integer,
intent(in),
value :: dir
224 type(c_ptr),
optional :: strm
225 type(c_ptr) :: ptr_h, copy_stream
226 integer(c_size_t) :: s
228 if (
present(strm))
then
238 type is (
integer(i8))
244 type is (double precision)
257 integer,
intent(in) :: n
258 class(*),
intent(inout),
target :: x(:,:)
259 type(c_ptr),
intent(inout) :: x_d
260 integer,
intent(in),
value :: dir
262 type(c_ptr),
optional :: strm
263 type(c_ptr) :: ptr_h, copy_stream
264 integer(c_size_t) :: s
266 if (
present(strm))
then
276 type is (
integer(i8))
282 type is (double precision)
295 integer,
intent(in) :: n
296 class(*),
intent(inout),
target :: x(:,:,:)
297 type(c_ptr),
intent(inout) :: x_d
298 integer,
intent(in),
value :: dir
300 type(c_ptr),
optional :: strm
301 type(c_ptr) :: ptr_h, copy_stream
302 integer(c_size_t) :: s
304 if (
present(strm))
then
314 type is (
integer(i8))
320 type is (double precision)
333 integer,
intent(in) :: n
334 class(*),
intent(inout),
target :: x(:,:,:,:)
335 type(c_ptr),
intent(inout) :: x_d
336 integer,
intent(in),
value :: dir
338 type(c_ptr),
optional :: strm
339 type(c_ptr) :: ptr_h, copy_stream
340 integer(c_size_t) :: s
342 if (
present(strm))
then
352 type is (
integer(i8))
358 type is (double precision)
373 type(c_ptr),
intent(inout) :: dst
374 type(c_ptr),
intent(inout) :: src
375 integer(c_size_t),
intent(in) :: s
376 integer,
intent(in),
value :: dir
377 logical,
optional :: sync
378 type(c_ptr),
optional :: strm
379 type(c_ptr) :: copy_stream
380 logical :: sync_device
382 if (
present(sync))
then
385 sync_device = .false.
388 if (
present(strm))
then
402 type(c_ptr),
intent(inout) :: ptr_h
403 type(c_ptr),
intent(inout) :: x_d
404 integer(c_size_t),
intent(in) :: s
405 integer,
intent(in),
value :: dir
406 logical,
intent(in) :: sync_device
407 type(c_ptr),
intent(inout) :: stream
412 call neko_error(
'Device memcpy async (host-to-device) failed')
417 call neko_error(
'Device memcpy async (device-to-host) failed')
422 call neko_error(
'Device memcpy async (device-to-device) failed')
425 call neko_error(
'Device memcpy failed (invalid direction')
427 if (sync_device)
then
434 call neko_error(
'Device memcpy async (host-to-device) failed')
439 call neko_error(
'Device memcpy async (device-to-host) failed')
444 call neko_error(
'Device memcpy async (device-to-device) failed')
447 call neko_error(
'Device memcpy failed (invalid direction')
449 if (sync_device)
then
453 if (sync_device)
then
456 ptr_h, 0, c_null_ptr, c_null_ptr) &
458 call neko_error(
'Device memcpy (host-to-device) failed')
462 0, c_null_ptr, c_null_ptr) &
464 call neko_error(
'Device memcpy (device-to-host) failed')
468 0, c_null_ptr, c_null_ptr) &
470 call neko_error(
'Device memcpy (device-to-device) failed')
473 call neko_error(
'Device memcpy failed (invalid direction')
478 ptr_h, 0, c_null_ptr, c_null_ptr) &
480 call neko_error(
'Device memcpy (host-to-device) failed')
484 0, c_null_ptr, c_null_ptr) &
486 call neko_error(
'Device memcpy (device-to-host) failed')
490 0, c_null_ptr, c_null_ptr) &
492 call neko_error(
'Device memcpy (device-to-device) failed')
495 call neko_error(
'Device memcpy failed (invalid direction')
503 class(*),
intent(inout),
target :: x(:)
504 type(c_ptr),
intent(inout) :: x_d
505 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
509 htbl_ptr_h%ptr = c_loc(x)
510 type is (
integer(i8))
511 htbl_ptr_h%ptr = c_loc(x)
513 htbl_ptr_h%ptr = c_loc(x)
514 type is (double precision)
515 htbl_ptr_h%ptr = c_loc(x)
528 class(*),
intent(inout),
target :: x(:,:)
529 type(c_ptr),
intent(inout) :: x_d
530 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
534 htbl_ptr_h%ptr = c_loc(x)
535 type is (
integer(i8))
536 htbl_ptr_h%ptr = c_loc(x)
538 htbl_ptr_h%ptr = c_loc(x)
539 type is (double precision)
540 htbl_ptr_h%ptr = c_loc(x)
553 class(*),
intent(inout),
target :: x(:,:,:)
554 type(c_ptr),
intent(inout) :: x_d
555 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
559 htbl_ptr_h%ptr = c_loc(x)
560 type is (
integer(i8))
561 htbl_ptr_h%ptr = c_loc(x)
563 htbl_ptr_h%ptr = c_loc(x)
564 type is (double precision)
565 htbl_ptr_h%ptr = c_loc(x)
578 class(*),
intent(inout),
target :: x(:,:,:,:)
579 type(c_ptr),
intent(inout) :: x_d
580 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
584 htbl_ptr_h%ptr = c_loc(x)
585 type is (
integer(i8))
586 htbl_ptr_h%ptr = c_loc(x)
588 htbl_ptr_h%ptr = c_loc(x)
589 type is (double precision)
590 htbl_ptr_h%ptr = c_loc(x)
603 class(*),
intent(inout),
target :: x(:)
604 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
608 htbl_ptr_h%ptr = c_loc(x)
609 type is (
integer(i8))
610 htbl_ptr_h%ptr = c_loc(x)
612 htbl_ptr_h%ptr = c_loc(x)
613 type is (double precision)
614 htbl_ptr_h%ptr = c_loc(x)
627 class(*),
intent(inout),
target :: x(:,:)
628 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
632 htbl_ptr_h%ptr = c_loc(x)
633 type is (
integer(i8))
634 htbl_ptr_h%ptr = c_loc(x)
636 htbl_ptr_h%ptr = c_loc(x)
637 type is (double precision)
638 htbl_ptr_h%ptr = c_loc(x)
651 class(*),
intent(inout),
target :: x(:,:,:)
652 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
656 htbl_ptr_h%ptr = c_loc(x)
657 type is (
integer(i8))
658 htbl_ptr_h%ptr = c_loc(x)
660 htbl_ptr_h%ptr = c_loc(x)
661 type is (double precision)
662 htbl_ptr_h%ptr = c_loc(x)
675 class(*),
intent(inout),
target :: x(:,:,:,:)
676 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
680 htbl_ptr_h%ptr = c_loc(x)
681 type is (
integer(i8))
682 htbl_ptr_h%ptr = c_loc(x)
684 htbl_ptr_h%ptr = c_loc(x)
685 type is (double precision)
686 htbl_ptr_h%ptr = c_loc(x)
699 integer,
intent(in) :: n
700 class(*),
intent(inout),
target :: x(:)
701 type(c_ptr),
intent(inout) :: x_d
702 integer(c_size_t) :: s
704 if (c_associated(x_d))
then
705 call neko_error(
'Device pointer already associated')
711 type is (
integer(i8))
715 type is (double precision)
728 integer,
intent(in) :: n
729 class(*),
intent(inout),
target :: x(:,:)
730 type(c_ptr),
intent(inout) :: x_d
731 integer(c_size_t) :: s
733 if (c_associated(x_d))
then
734 call neko_error(
'Device pointer already associated')
740 type is (
integer(i8))
744 type is (double precision)
757 integer,
intent(in) :: n
758 class(*),
intent(inout),
target :: x(:,:,:)
759 type(c_ptr),
intent(inout) :: x_d
760 integer(c_size_t) :: s
762 if (c_associated(x_d))
then
763 call neko_error(
'Device pointer already associated')
769 type is (
integer(i8))
773 type is (double precision)
786 integer,
intent(in) :: n
787 class(*),
intent(inout),
target :: x(:,:,:,:)
788 type(c_ptr),
intent(inout) :: x_d
789 integer(c_size_t) :: s
791 if (c_associated(x_d))
then
792 call neko_error(
'Device pointer already associated')
798 type is (
integer(i8))
802 type is (double precision)
815 class(*),
intent(inout),
target :: x(:)
816 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
821 htbl_ptr_h%ptr = c_loc(x)
822 type is (
integer(
i8))
823 htbl_ptr_h%ptr = c_loc(x)
825 htbl_ptr_h%ptr = c_loc(x)
826 type is (double precision)
827 htbl_ptr_h%ptr = c_loc(x)
842 class(*),
intent(inout),
target :: x(:,:)
843 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
848 htbl_ptr_h%ptr = c_loc(x)
849 type is (
integer(
i8))
850 htbl_ptr_h%ptr = c_loc(x)
852 htbl_ptr_h%ptr = c_loc(x)
853 type is (double precision)
854 htbl_ptr_h%ptr = c_loc(x)
869 class(*),
intent(inout),
target :: x(:,:,:)
870 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
875 htbl_ptr_h%ptr = c_loc(x)
876 type is (
integer(
i8))
877 htbl_ptr_h%ptr = c_loc(x)
879 htbl_ptr_h%ptr = c_loc(x)
880 type is (double precision)
881 htbl_ptr_h%ptr = c_loc(x)
896 class(*),
intent(inout),
target :: x(:,:,:,:)
897 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
902 htbl_ptr_h%ptr = c_loc(x)
903 type is (
integer(
i8))
904 htbl_ptr_h%ptr = c_loc(x)
906 htbl_ptr_h%ptr = c_loc(x)
907 type is (double precision)
908 htbl_ptr_h%ptr = c_loc(x)
923 class(*),
intent(in),
target :: x(:)
924 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
931 htbl_ptr_h%ptr = c_loc(x)
932 type is (
integer(
i8))
933 htbl_ptr_h%ptr = c_loc(x)
935 htbl_ptr_h%ptr = c_loc(x)
936 type is (double precision)
937 htbl_ptr_h%ptr = c_loc(x)
945 call neko_error(
'Array not associated with device')
951 class(*),
intent(in),
target :: x(:,:)
952 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
959 htbl_ptr_h%ptr = c_loc(x)
960 type is (
integer(
i8))
961 htbl_ptr_h%ptr = c_loc(x)
963 htbl_ptr_h%ptr = c_loc(x)
964 type is (double precision)
965 htbl_ptr_h%ptr = c_loc(x)
973 call neko_error(
'Array not associated with device')
979 class(*),
intent(in),
target :: x(:,:,:)
980 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
987 htbl_ptr_h%ptr = c_loc(x)
988 type is (
integer(
i8))
989 htbl_ptr_h%ptr = c_loc(x)
991 htbl_ptr_h%ptr = c_loc(x)
992 type is (double precision)
993 htbl_ptr_h%ptr = c_loc(x)
1001 call neko_error(
'Array not associated with device')
1007 class(*),
intent(in),
target :: x(:,:,:,:)
1008 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1015 htbl_ptr_h%ptr = c_loc(x)
1016 type is (
integer(
i8))
1017 htbl_ptr_h%ptr = c_loc(x)
1019 htbl_ptr_h%ptr = c_loc(x)
1020 type is (double precision)
1021 htbl_ptr_h%ptr = c_loc(x)
1029 call neko_error(
'Array not associated with device')
1052 type(c_ptr),
intent(in) :: stream
1070 type(c_ptr),
intent(inout) :: stream
1071 integer,
optional :: flags
1074 if (
present(flags))
then
1076 call neko_error(
'Error during stream create (w. flags)')
1080 call neko_error(
'Error during stream create')
1084 if (
present(flags))
then
1086 call neko_error(
'Error during stream create (w. flags)')
1090 call neko_error(
'Error during stream create')
1096 call neko_error(
'Error during stream create')
1103 type(c_ptr),
intent(inout) :: stream
1104 integer,
intent(in) :: flags, prio
1107 call neko_error(
'Error during stream create (w. priority)')
1111 call neko_error(
'Error during stream create (w. priority)')
1120 type(c_ptr),
intent(inout) :: stream
1123 call neko_error(
'Error during stream destroy')
1127 call neko_error(
'Error during stream destroy')
1131 call neko_error(
'Error during stream destroy')
1138 type(c_ptr),
intent(in) :: stream
1139 type(c_ptr),
target,
intent(in) :: event
1179 type(c_ptr),
intent(inout) :: event
1180 integer,
optional :: flags
1183 if (
present(flags))
then
1185 call neko_error(
'Error during event create (w. flags)')
1193 if (
present(flags))
then
1195 call neko_error(
'Error during event create (w. flags)')
1209 type(c_ptr),
intent(inout) :: event
1212 call neko_error(
'Error during event destroy')
1216 call neko_error(
'Error during event destroy')
1225 type(c_ptr),
target,
intent(in) :: event
1226 type(c_ptr),
intent(in) :: stream
1244 type(c_ptr),
target,
intent(in) :: event
1254 if (c_associated(event))
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, public strm_low_prio
Low priority stream setting.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
subroutine, public device_event_create(event, flags)
Create a device event queue.
integer function, public device_count()
Return the number of available devices.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
integer, public strm_high_prio
High priority stream setting.
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
integer function hip_device_count()
Return the number of available HIP devices.
Implements a hash table ADT.
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue)
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue)
OpenCL JIT program library.
subroutine, public opencl_prgm_lib_release
C pointer based hash table.