43 use,
intrinsic :: iso_c_binding
128#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
144 call neko_error(
'Only one device is supported per MPI rank')
150#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
166 character(len=*),
intent(inout) :: name
192 type(c_ptr),
intent(inout) :: x_d
193 integer(c_size_t) :: s
203 call neko_error(
'Memory allocation on device failed')
207 call neko_error(
'Memory allocation on device failed')
212 call neko_error(
'Memory allocation on device failed')
219 type(c_ptr),
intent(inout) :: x_d
222 call neko_error(
'Memory deallocation on device failed')
226 call neko_error(
'Memory deallocation on device failed')
230 call neko_error(
'Memory deallocation on device failed')
238 type(c_ptr),
intent(inout) :: x_d
239 integer(c_int),
target,
value :: v
240 integer(c_size_t),
intent(in) :: s
241 logical,
optional :: sync
242 type(c_ptr),
optional :: strm
243 type(c_ptr) :: stream
244 logical :: sync_device
246 if (
present(sync))
then
249 sync_device = .false.
252 if (
present(strm))
then
268 s, 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
273 if (sync_device)
then
281 integer,
intent(in) :: n
282 class(*),
intent(inout),
target :: x(:)
283 type(c_ptr),
intent(inout) :: x_d
284 integer,
intent(in),
value :: dir
286 type(c_ptr),
optional :: strm
287 type(c_ptr) :: ptr_h, copy_stream
288 integer(c_size_t) :: s
290 if (
present(strm))
then
298 s = n * int(4, c_size_t)
300 type is (
integer(i8))
301 s = n * int(8, c_size_t)
304 s = n * int(4, c_size_t)
306 type is (double precision)
307 s = n * int(8, c_size_t)
319 integer,
intent(in) :: n
320 class(*),
intent(inout),
target :: x(:,:)
321 type(c_ptr),
intent(inout) :: x_d
322 integer,
intent(in),
value :: dir
324 type(c_ptr),
optional :: strm
325 type(c_ptr) :: ptr_h, copy_stream
326 integer(c_size_t) :: s
328 if (
present(strm))
then
336 s = n * int(4, c_size_t)
338 type is (
integer(i8))
339 s = n * int(8, c_size_t)
342 s = n * int(4, c_size_t)
344 type is (double precision)
345 s = n * int(8, c_size_t)
357 integer,
intent(in) :: n
358 class(*),
intent(inout),
target :: x(:,:,:)
359 type(c_ptr),
intent(inout) :: x_d
360 integer,
intent(in),
value :: dir
362 type(c_ptr),
optional :: strm
363 type(c_ptr) :: ptr_h, copy_stream
364 integer(c_size_t) :: s
366 if (
present(strm))
then
374 s = n * int(4, c_size_t)
376 type is (
integer(i8))
377 s = n * int(8, c_size_t)
380 s = n * int(4, c_size_t)
382 type is (double precision)
383 s = n * int(8, c_size_t)
395 integer,
intent(in) :: n
396 class(*),
intent(inout),
target :: x(:,:,:,:)
397 type(c_ptr),
intent(inout) :: x_d
398 integer,
intent(in),
value :: dir
400 type(c_ptr),
optional :: strm
401 type(c_ptr) :: ptr_h, copy_stream
402 integer(c_size_t) :: s
404 if (
present(strm))
then
412 s = n * int(4, c_size_t)
414 type is (
integer(i8))
415 s = n * int(8, c_size_t)
418 s = n * int(4, c_size_t)
420 type is (double precision)
421 s = n * int(8, c_size_t)
435 type(c_ptr),
intent(inout) :: dst
436 type(c_ptr),
intent(inout) :: src
437 integer(c_size_t),
intent(in) :: s
438 integer,
intent(in),
value :: dir
439 logical,
optional :: sync
440 type(c_ptr),
optional :: strm
441 type(c_ptr) :: copy_stream
442 logical :: sync_device
444 if (
present(sync))
then
447 sync_device = .false.
450 if (
present(strm))
then
464 type(c_ptr),
intent(inout) :: ptr_h
465 type(c_ptr),
intent(inout) :: x_d
466 integer(c_size_t),
intent(in) :: s
467 integer,
intent(in),
value :: dir
468 logical,
intent(in) :: sync_device
469 type(c_ptr),
intent(inout) :: stream
472 if (sync_device)
then
482 call neko_error(
'Device memcpy async (host-to-device) failed')
487 call neko_error(
'Device memcpy async (device-to-host) failed')
492 call neko_error(
'Device memcpy async (device-to-device) failed')
495 call neko_error(
'Device memcpy failed (invalid direction')
497 if (sync_device)
then
504 call neko_error(
'Device memcpy async (host-to-device) failed')
509 call neko_error(
'Device memcpy async (device-to-host) failed')
514 call neko_error(
'Device memcpy async (device-to-device) failed')
517 call neko_error(
'Device memcpy failed (invalid direction')
519 if (sync_device)
then
523 if (sync_device)
then
526 ptr_h, 0, c_null_ptr, c_null_ptr) &
528 call neko_error(
'Device memcpy (host-to-device) failed')
532 0, c_null_ptr, c_null_ptr) &
534 call neko_error(
'Device memcpy (device-to-host) failed')
538 0, c_null_ptr, c_null_ptr) &
540 call neko_error(
'Device memcpy (device-to-device) failed')
543 call neko_error(
'Device memcpy failed (invalid direction')
548 ptr_h, 0, c_null_ptr, c_null_ptr) &
550 call neko_error(
'Device memcpy (host-to-device) failed')
554 0, c_null_ptr, c_null_ptr) &
556 call neko_error(
'Device memcpy (device-to-host) failed')
560 0, c_null_ptr, c_null_ptr) &
562 call neko_error(
'Device memcpy (device-to-device) failed')
565 call neko_error(
'Device memcpy failed (invalid direction')
573 class(*),
intent(inout),
target :: x(:)
574 type(c_ptr),
intent(inout) :: x_d
575 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
579 htbl_ptr_h%ptr = c_loc(x)
580 type is (
integer(i8))
581 htbl_ptr_h%ptr = c_loc(x)
583 htbl_ptr_h%ptr = c_loc(x)
584 type is (double precision)
585 htbl_ptr_h%ptr = c_loc(x)
598 class(*),
intent(inout),
target :: x(:,:)
599 type(c_ptr),
intent(inout) :: x_d
600 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
604 htbl_ptr_h%ptr = c_loc(x)
605 type is (
integer(i8))
606 htbl_ptr_h%ptr = c_loc(x)
608 htbl_ptr_h%ptr = c_loc(x)
609 type is (double precision)
610 htbl_ptr_h%ptr = c_loc(x)
623 class(*),
intent(inout),
target :: x(:,:,:)
624 type(c_ptr),
intent(inout) :: x_d
625 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
629 htbl_ptr_h%ptr = c_loc(x)
630 type is (
integer(i8))
631 htbl_ptr_h%ptr = c_loc(x)
633 htbl_ptr_h%ptr = c_loc(x)
634 type is (double precision)
635 htbl_ptr_h%ptr = c_loc(x)
648 class(*),
intent(inout),
target :: x(:,:,:,:)
649 type(c_ptr),
intent(inout) :: x_d
650 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
654 htbl_ptr_h%ptr = c_loc(x)
655 type is (
integer(i8))
656 htbl_ptr_h%ptr = c_loc(x)
658 htbl_ptr_h%ptr = c_loc(x)
659 type is (double precision)
660 htbl_ptr_h%ptr = c_loc(x)
673 class(*),
intent(inout),
target :: x(:)
674 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
678 htbl_ptr_h%ptr = c_loc(x)
679 type is (
integer(i8))
680 htbl_ptr_h%ptr = c_loc(x)
682 htbl_ptr_h%ptr = c_loc(x)
683 type is (double precision)
684 htbl_ptr_h%ptr = c_loc(x)
697 class(*),
intent(inout),
target :: x(:,:)
698 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
702 htbl_ptr_h%ptr = c_loc(x)
703 type is (
integer(i8))
704 htbl_ptr_h%ptr = c_loc(x)
706 htbl_ptr_h%ptr = c_loc(x)
707 type is (double precision)
708 htbl_ptr_h%ptr = c_loc(x)
721 class(*),
intent(inout),
target :: x(:,:,:)
722 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
726 htbl_ptr_h%ptr = c_loc(x)
727 type is (
integer(i8))
728 htbl_ptr_h%ptr = c_loc(x)
730 htbl_ptr_h%ptr = c_loc(x)
731 type is (double precision)
732 htbl_ptr_h%ptr = c_loc(x)
745 class(*),
intent(inout),
target :: x(:,:,:,:)
746 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
750 htbl_ptr_h%ptr = c_loc(x)
751 type is (
integer(i8))
752 htbl_ptr_h%ptr = c_loc(x)
754 htbl_ptr_h%ptr = c_loc(x)
755 type is (double precision)
756 htbl_ptr_h%ptr = c_loc(x)
769 integer,
intent(in) :: n
770 class(*),
intent(inout),
target :: x(:)
771 type(c_ptr),
intent(inout) :: x_d
772 integer(c_size_t) :: s
774 if (c_associated(x_d))
then
775 call neko_error(
'Device pointer already associated')
780 s = n * int(4, c_size_t)
781 type is (
integer(i8))
782 s = n * int(8, c_size_t)
784 s = n * int(4, c_size_t)
785 type is (double precision)
786 s = n * int(8, c_size_t)
798 integer,
intent(in) :: n
799 class(*),
intent(inout),
target :: x(:,:)
800 type(c_ptr),
intent(inout) :: x_d
801 integer(c_size_t) :: s
803 if (c_associated(x_d))
then
804 call neko_error(
'Device pointer already associated')
809 s = n * int(4, c_size_t)
810 type is (
integer(i8))
811 s = n * int(8, c_size_t)
813 s = n * int(4, c_size_t)
814 type is (double precision)
815 s = n * int(8, c_size_t)
827 integer,
intent(in) :: n
828 class(*),
intent(inout),
target :: x(:,:,:)
829 type(c_ptr),
intent(inout) :: x_d
830 integer(c_size_t) :: s
832 if (c_associated(x_d))
then
833 call neko_error(
'Device pointer already associated')
838 s = n * int(4, c_size_t)
839 type is (
integer(i8))
840 s = n * int(8, c_size_t)
842 s = n * int(4, c_size_t)
843 type is (double precision)
844 s = n * int(8, c_size_t)
856 integer,
intent(in) :: n
857 class(*),
intent(inout),
target :: x(:,:,:,:)
858 type(c_ptr),
intent(inout) :: x_d
859 integer(c_size_t) :: s
861 if (c_associated(x_d))
then
862 call neko_error(
'Device pointer already associated')
867 s = n * int(4, c_size_t)
868 type is (
integer(i8))
869 s = n * int(8, c_size_t)
871 s = n * int(4, c_size_t)
872 type is (double precision)
873 s = n * int(8, c_size_t)
885 class(*),
intent(inout),
target :: x(:)
886 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
891 htbl_ptr_h%ptr = c_loc(x)
892 type is (
integer(
i8))
893 htbl_ptr_h%ptr = c_loc(x)
895 htbl_ptr_h%ptr = c_loc(x)
896 type is (double precision)
897 htbl_ptr_h%ptr = c_loc(x)
912 class(*),
intent(inout),
target :: x(:,:)
913 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
918 htbl_ptr_h%ptr = c_loc(x)
919 type is (
integer(
i8))
920 htbl_ptr_h%ptr = c_loc(x)
922 htbl_ptr_h%ptr = c_loc(x)
923 type is (double precision)
924 htbl_ptr_h%ptr = c_loc(x)
939 class(*),
intent(inout),
target :: x(:,:,:)
940 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
945 htbl_ptr_h%ptr = c_loc(x)
946 type is (
integer(
i8))
947 htbl_ptr_h%ptr = c_loc(x)
949 htbl_ptr_h%ptr = c_loc(x)
950 type is (double precision)
951 htbl_ptr_h%ptr = c_loc(x)
966 class(*),
intent(inout),
target :: x(:,:,:,:)
967 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
972 htbl_ptr_h%ptr = c_loc(x)
973 type is (
integer(
i8))
974 htbl_ptr_h%ptr = c_loc(x)
976 htbl_ptr_h%ptr = c_loc(x)
977 type is (double precision)
978 htbl_ptr_h%ptr = c_loc(x)
993 class(*),
intent(in),
target :: x(:)
994 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1001 htbl_ptr_h%ptr = c_loc(x)
1002 type is (
integer(
i8))
1003 htbl_ptr_h%ptr = c_loc(x)
1005 htbl_ptr_h%ptr = c_loc(x)
1006 type is (double precision)
1007 htbl_ptr_h%ptr = c_loc(x)
1015 call neko_error(
'Array not associated with device')
1021 class(*),
intent(in),
target :: x(:,:)
1022 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1029 htbl_ptr_h%ptr = c_loc(x)
1030 type is (
integer(
i8))
1031 htbl_ptr_h%ptr = c_loc(x)
1033 htbl_ptr_h%ptr = c_loc(x)
1034 type is (double precision)
1035 htbl_ptr_h%ptr = c_loc(x)
1043 call neko_error(
'Array not associated with device')
1049 class(*),
intent(in),
target :: x(:,:,:)
1050 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1057 htbl_ptr_h%ptr = c_loc(x)
1058 type is (
integer(
i8))
1059 htbl_ptr_h%ptr = c_loc(x)
1061 htbl_ptr_h%ptr = c_loc(x)
1062 type is (double precision)
1063 htbl_ptr_h%ptr = c_loc(x)
1071 call neko_error(
'Array not associated with device')
1077 class(*),
intent(in),
target :: x(:,:,:,:)
1078 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1085 htbl_ptr_h%ptr = c_loc(x)
1086 type is (
integer(
i8))
1087 htbl_ptr_h%ptr = c_loc(x)
1089 htbl_ptr_h%ptr = c_loc(x)
1090 type is (double precision)
1091 htbl_ptr_h%ptr = c_loc(x)
1099 call neko_error(
'Array not associated with device')
1122 type(c_ptr),
intent(in) :: stream
1140 type(c_ptr),
intent(inout) :: stream
1141 integer,
optional :: flags
1144 if (
present(flags))
then
1146 call neko_error(
'Error during stream create (w. flags)')
1150 call neko_error(
'Error during stream create')
1154 if (
present(flags))
then
1156 call neko_error(
'Error during stream create (w. flags)')
1160 call neko_error(
'Error during stream create')
1166 call neko_error(
'Error during stream create')
1173 type(c_ptr),
intent(inout) :: stream
1174 integer,
intent(in) :: flags, prio
1177 call neko_error(
'Error during stream create (w. priority)')
1181 call neko_error(
'Error during stream create (w. priority)')
1190 type(c_ptr),
intent(inout) :: stream
1193 call neko_error(
'Error during stream destroy')
1197 call neko_error(
'Error during stream destroy')
1201 call neko_error(
'Error during stream destroy')
1208 type(c_ptr),
intent(in) :: stream
1209 type(c_ptr),
target,
intent(in) :: event
1249 type(c_ptr),
intent(inout) :: event
1250 integer,
optional :: flags
1253 if (
present(flags))
then
1255 call neko_error(
'Error during event create (w. flags)')
1263 if (
present(flags))
then
1265 call neko_error(
'Error during event create (w. flags)')
1279 type(c_ptr),
intent(inout) :: event
1282 call neko_error(
'Error during event destroy')
1286 call neko_error(
'Error during event destroy')
1295 type(c_ptr),
target,
intent(in) :: event
1296 type(c_ptr),
intent(in) :: stream
1314 type(c_ptr),
target,
intent(in) :: event
1324 if (c_associated(event))
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, public strm_low_prio
Low priority stream setting.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
subroutine, public device_event_create(event, flags)
Create a device event queue.
integer function, public device_count()
Return the number of available devices.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
integer, public strm_high_prio
High priority stream setting.
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
integer function hip_device_count()
Return the number of available HIP devices.
Implements a hash table ADT.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
subroutine, public opencl_prgm_lib_release
C pointer based hash table.