43 use,
intrinsic :: iso_c_binding
123#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
139 call neko_error(
'Only one device is supported per MPI rank')
145#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
161 character(len=*),
intent(inout) :: name
187 type(c_ptr),
intent(inout) :: x_d
188 integer(c_size_t) :: s
198 call neko_error(
'Memory allocation on device failed')
202 call neko_error(
'Memory allocation on device failed')
207 call neko_error(
'Memory allocation on device failed')
214 type(c_ptr),
intent(inout) :: x_d
217 call neko_error(
'Memory deallocation on device failed')
221 call neko_error(
'Memory deallocation on device failed')
225 call neko_error(
'Memory deallocation on device failed')
233 type(c_ptr),
intent(inout) :: x_d
234 integer(c_int),
target,
value :: v
235 integer(c_size_t),
intent(in) :: s
236 logical,
optional :: sync
237 type(c_ptr),
optional :: strm
238 type(c_ptr) :: stream
239 logical :: sync_device
241 if (
present(sync))
then
244 sync_device = .false.
247 if (
present(strm))
then
263 s, 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
268 if (sync_device)
then
276 integer,
intent(in) :: n
277 class(*),
intent(inout),
target :: x(:)
278 type(c_ptr),
intent(inout) :: x_d
279 integer,
intent(in),
value :: dir
281 type(c_ptr),
optional :: strm
282 type(c_ptr) :: ptr_h, copy_stream
283 integer(c_size_t) :: s
285 if (
present(strm))
then
293 s = n * int(4, c_size_t)
295 type is (
integer(i8))
296 s = n * int(8, c_size_t)
299 s = n * int(4, c_size_t)
301 type is (double precision)
302 s = n * int(8, c_size_t)
314 integer,
intent(in) :: n
315 class(*),
intent(inout),
target :: x(:,:)
316 type(c_ptr),
intent(inout) :: x_d
317 integer,
intent(in),
value :: dir
319 type(c_ptr),
optional :: strm
320 type(c_ptr) :: ptr_h, copy_stream
321 integer(c_size_t) :: s
323 if (
present(strm))
then
331 s = n * int(4, c_size_t)
333 type is (
integer(i8))
334 s = n * int(8, c_size_t)
337 s = n * int(4, c_size_t)
339 type is (double precision)
340 s = n * int(8, c_size_t)
352 integer,
intent(in) :: n
353 class(*),
intent(inout),
target :: x(:,:,:)
354 type(c_ptr),
intent(inout) :: x_d
355 integer,
intent(in),
value :: dir
357 type(c_ptr),
optional :: strm
358 type(c_ptr) :: ptr_h, copy_stream
359 integer(c_size_t) :: s
361 if (
present(strm))
then
369 s = n * int(4, c_size_t)
371 type is (
integer(i8))
372 s = n * int(8, c_size_t)
375 s = n * int(4, c_size_t)
377 type is (double precision)
378 s = n * int(8, c_size_t)
390 integer,
intent(in) :: n
391 class(*),
intent(inout),
target :: x(:,:,:,:)
392 type(c_ptr),
intent(inout) :: x_d
393 integer,
intent(in),
value :: dir
395 type(c_ptr),
optional :: strm
396 type(c_ptr) :: ptr_h, copy_stream
397 integer(c_size_t) :: s
399 if (
present(strm))
then
407 s = n * int(4, c_size_t)
409 type is (
integer(i8))
410 s = n * int(8, c_size_t)
413 s = n * int(4, c_size_t)
415 type is (double precision)
416 s = n * int(8, c_size_t)
430 type(c_ptr),
intent(inout) :: dst
431 type(c_ptr),
intent(inout) :: src
432 integer(c_size_t),
intent(in) :: s
433 integer,
intent(in),
value :: dir
434 logical,
optional :: sync
435 type(c_ptr),
optional :: strm
436 type(c_ptr) :: copy_stream
437 logical :: sync_device
439 if (
present(sync))
then
442 sync_device = .false.
445 if (
present(strm))
then
459 type(c_ptr),
intent(inout) :: ptr_h
460 type(c_ptr),
intent(inout) :: x_d
461 integer(c_size_t),
intent(in) :: s
462 integer,
intent(in),
value :: dir
463 logical,
intent(in) :: sync_device
464 type(c_ptr),
intent(inout) :: stream
467 if (sync_device)
then
477 call neko_error(
'Device memcpy async (host-to-device) failed')
482 call neko_error(
'Device memcpy async (device-to-host) failed')
487 call neko_error(
'Device memcpy async (device-to-device) failed')
490 call neko_error(
'Device memcpy failed (invalid direction')
492 if (sync_device)
then
499 call neko_error(
'Device memcpy async (host-to-device) failed')
504 call neko_error(
'Device memcpy async (device-to-host) failed')
509 call neko_error(
'Device memcpy async (device-to-device) failed')
512 call neko_error(
'Device memcpy failed (invalid direction')
514 if (sync_device)
then
518 if (sync_device)
then
521 ptr_h, 0, c_null_ptr, c_null_ptr) &
523 call neko_error(
'Device memcpy (host-to-device) failed')
527 0, c_null_ptr, c_null_ptr) &
529 call neko_error(
'Device memcpy (device-to-host) failed')
533 0, c_null_ptr, c_null_ptr) &
535 call neko_error(
'Device memcpy (device-to-device) failed')
538 call neko_error(
'Device memcpy failed (invalid direction')
543 ptr_h, 0, c_null_ptr, c_null_ptr) &
545 call neko_error(
'Device memcpy (host-to-device) failed')
549 0, c_null_ptr, c_null_ptr) &
551 call neko_error(
'Device memcpy (device-to-host) failed')
555 0, c_null_ptr, c_null_ptr) &
557 call neko_error(
'Device memcpy (device-to-device) failed')
560 call neko_error(
'Device memcpy failed (invalid direction')
568 class(*),
intent(inout),
target :: x(:)
569 type(c_ptr),
intent(inout) :: x_d
570 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
574 htbl_ptr_h%ptr = c_loc(x)
575 type is (
integer(i8))
576 htbl_ptr_h%ptr = c_loc(x)
578 htbl_ptr_h%ptr = c_loc(x)
579 type is (double precision)
580 htbl_ptr_h%ptr = c_loc(x)
593 class(*),
intent(inout),
target :: x(:,:)
594 type(c_ptr),
intent(inout) :: x_d
595 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
599 htbl_ptr_h%ptr = c_loc(x)
600 type is (
integer(i8))
601 htbl_ptr_h%ptr = c_loc(x)
603 htbl_ptr_h%ptr = c_loc(x)
604 type is (double precision)
605 htbl_ptr_h%ptr = c_loc(x)
618 class(*),
intent(inout),
target :: x(:,:,:)
619 type(c_ptr),
intent(inout) :: x_d
620 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
624 htbl_ptr_h%ptr = c_loc(x)
625 type is (
integer(i8))
626 htbl_ptr_h%ptr = c_loc(x)
628 htbl_ptr_h%ptr = c_loc(x)
629 type is (double precision)
630 htbl_ptr_h%ptr = c_loc(x)
643 class(*),
intent(inout),
target :: x(:,:,:,:)
644 type(c_ptr),
intent(inout) :: x_d
645 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
649 htbl_ptr_h%ptr = c_loc(x)
650 type is (
integer(i8))
651 htbl_ptr_h%ptr = c_loc(x)
653 htbl_ptr_h%ptr = c_loc(x)
654 type is (double precision)
655 htbl_ptr_h%ptr = c_loc(x)
668 class(*),
intent(inout),
target :: x(:)
669 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
673 htbl_ptr_h%ptr = c_loc(x)
674 type is (
integer(i8))
675 htbl_ptr_h%ptr = c_loc(x)
677 htbl_ptr_h%ptr = c_loc(x)
678 type is (double precision)
679 htbl_ptr_h%ptr = c_loc(x)
692 class(*),
intent(inout),
target :: x(:,:)
693 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
697 htbl_ptr_h%ptr = c_loc(x)
698 type is (
integer(i8))
699 htbl_ptr_h%ptr = c_loc(x)
701 htbl_ptr_h%ptr = c_loc(x)
702 type is (double precision)
703 htbl_ptr_h%ptr = c_loc(x)
716 class(*),
intent(inout),
target :: x(:,:,:)
717 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
721 htbl_ptr_h%ptr = c_loc(x)
722 type is (
integer(i8))
723 htbl_ptr_h%ptr = c_loc(x)
725 htbl_ptr_h%ptr = c_loc(x)
726 type is (double precision)
727 htbl_ptr_h%ptr = c_loc(x)
740 class(*),
intent(inout),
target :: x(:,:,:,:)
741 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
745 htbl_ptr_h%ptr = c_loc(x)
746 type is (
integer(i8))
747 htbl_ptr_h%ptr = c_loc(x)
749 htbl_ptr_h%ptr = c_loc(x)
750 type is (double precision)
751 htbl_ptr_h%ptr = c_loc(x)
764 integer,
intent(in) :: n
765 class(*),
intent(inout),
target :: x(:)
766 type(c_ptr),
intent(inout) :: x_d
767 integer(c_size_t) :: s
769 if (c_associated(x_d))
then
770 call neko_error(
'Device pointer already associated')
775 s = n * int(4, c_size_t)
776 type is (
integer(i8))
777 s = n * int(8, c_size_t)
779 s = n * int(4, c_size_t)
780 type is (double precision)
781 s = n * int(8, c_size_t)
793 integer,
intent(in) :: n
794 class(*),
intent(inout),
target :: x(:,:)
795 type(c_ptr),
intent(inout) :: x_d
796 integer(c_size_t) :: s
798 if (c_associated(x_d))
then
799 call neko_error(
'Device pointer already associated')
804 s = n * int(4, c_size_t)
805 type is (
integer(i8))
806 s = n * int(8, c_size_t)
808 s = n * int(4, c_size_t)
809 type is (double precision)
810 s = n * int(8, c_size_t)
822 integer,
intent(in) :: n
823 class(*),
intent(inout),
target :: x(:,:,:)
824 type(c_ptr),
intent(inout) :: x_d
825 integer(c_size_t) :: s
827 if (c_associated(x_d))
then
828 call neko_error(
'Device pointer already associated')
833 s = n * int(4, c_size_t)
834 type is (
integer(i8))
835 s = n * int(8, c_size_t)
837 s = n * int(4, c_size_t)
838 type is (double precision)
839 s = n * int(8, c_size_t)
851 integer,
intent(in) :: n
852 class(*),
intent(inout),
target :: x(:,:,:,:)
853 type(c_ptr),
intent(inout) :: x_d
854 integer(c_size_t) :: s
856 if (c_associated(x_d))
then
857 call neko_error(
'Device pointer already associated')
862 s = n * int(4, c_size_t)
863 type is (
integer(i8))
864 s = n * int(8, c_size_t)
866 s = n * int(4, c_size_t)
867 type is (double precision)
868 s = n * int(8, c_size_t)
880 class(*),
intent(inout),
target :: x(:)
881 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
886 htbl_ptr_h%ptr = c_loc(x)
887 type is (
integer(
i8))
888 htbl_ptr_h%ptr = c_loc(x)
890 htbl_ptr_h%ptr = c_loc(x)
891 type is (double precision)
892 htbl_ptr_h%ptr = c_loc(x)
907 class(*),
intent(inout),
target :: x(:,:)
908 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
913 htbl_ptr_h%ptr = c_loc(x)
914 type is (
integer(
i8))
915 htbl_ptr_h%ptr = c_loc(x)
917 htbl_ptr_h%ptr = c_loc(x)
918 type is (double precision)
919 htbl_ptr_h%ptr = c_loc(x)
934 class(*),
intent(inout),
target :: x(:,:,:)
935 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
940 htbl_ptr_h%ptr = c_loc(x)
941 type is (
integer(
i8))
942 htbl_ptr_h%ptr = c_loc(x)
944 htbl_ptr_h%ptr = c_loc(x)
945 type is (double precision)
946 htbl_ptr_h%ptr = c_loc(x)
961 class(*),
intent(inout),
target :: x(:,:,:,:)
962 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
967 htbl_ptr_h%ptr = c_loc(x)
968 type is (
integer(
i8))
969 htbl_ptr_h%ptr = c_loc(x)
971 htbl_ptr_h%ptr = c_loc(x)
972 type is (double precision)
973 htbl_ptr_h%ptr = c_loc(x)
988 class(*),
intent(in),
target :: x(:)
989 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
996 htbl_ptr_h%ptr = c_loc(x)
997 type is (
integer(
i8))
998 htbl_ptr_h%ptr = c_loc(x)
1000 htbl_ptr_h%ptr = c_loc(x)
1001 type is (double precision)
1002 htbl_ptr_h%ptr = c_loc(x)
1010 call neko_error(
'Array not associated with device')
1016 class(*),
intent(in),
target :: x(:,:)
1017 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1024 htbl_ptr_h%ptr = c_loc(x)
1025 type is (
integer(
i8))
1026 htbl_ptr_h%ptr = c_loc(x)
1028 htbl_ptr_h%ptr = c_loc(x)
1029 type is (double precision)
1030 htbl_ptr_h%ptr = c_loc(x)
1038 call neko_error(
'Array not associated with device')
1044 class(*),
intent(in),
target :: x(:,:,:)
1045 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1052 htbl_ptr_h%ptr = c_loc(x)
1053 type is (
integer(
i8))
1054 htbl_ptr_h%ptr = c_loc(x)
1056 htbl_ptr_h%ptr = c_loc(x)
1057 type is (double precision)
1058 htbl_ptr_h%ptr = c_loc(x)
1066 call neko_error(
'Array not associated with device')
1072 class(*),
intent(in),
target :: x(:,:,:,:)
1073 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1080 htbl_ptr_h%ptr = c_loc(x)
1081 type is (
integer(
i8))
1082 htbl_ptr_h%ptr = c_loc(x)
1084 htbl_ptr_h%ptr = c_loc(x)
1085 type is (double precision)
1086 htbl_ptr_h%ptr = c_loc(x)
1094 call neko_error(
'Array not associated with device')
1117 type(c_ptr),
intent(in) :: stream
1135 type(c_ptr),
intent(inout) :: stream
1136 integer,
optional :: flags
1139 if (
present(flags))
then
1141 call neko_error(
'Error during stream create (w. flags)')
1145 call neko_error(
'Error during stream create')
1149 if (
present(flags))
then
1151 call neko_error(
'Error during stream create (w. flags)')
1155 call neko_error(
'Error during stream create')
1161 call neko_error(
'Error during stream create')
1168 type(c_ptr),
intent(inout) :: stream
1169 integer,
intent(in) :: flags, prio
1172 call neko_error(
'Error during stream create (w. priority)')
1176 call neko_error(
'Error during stream create (w. priority)')
1185 type(c_ptr),
intent(inout) :: stream
1188 call neko_error(
'Error during stream destroy')
1192 call neko_error(
'Error during stream destroy')
1196 call neko_error(
'Error during stream destroy')
1203 type(c_ptr),
intent(in) :: stream
1204 type(c_ptr),
target,
intent(in) :: event
1244 type(c_ptr),
intent(inout) :: event
1245 integer,
optional :: flags
1248 if (
present(flags))
then
1250 call neko_error(
'Error during event create (w. flags)')
1258 if (
present(flags))
then
1260 call neko_error(
'Error during event create (w. flags)')
1274 type(c_ptr),
intent(inout) :: event
1277 call neko_error(
'Error during event destroy')
1281 call neko_error(
'Error during event destroy')
1290 type(c_ptr),
target,
intent(in) :: event
1291 type(c_ptr),
intent(in) :: stream
1309 type(c_ptr),
target,
intent(in) :: event
1319 if (c_associated(event))
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, public strm_low_prio
Low priority stream setting.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
subroutine, public device_event_create(event, flags)
Create a device event queue.
integer function, public device_count()
Return the number of available devices.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
integer, public strm_high_prio
High priority stream setting.
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
integer function hip_device_count()
Return the number of available HIP devices.
Implements a hash table ADT.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue)
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue)
OpenCL JIT program library.
subroutine, public opencl_prgm_lib_release
C pointer based hash table.