43 use,
intrinsic :: iso_c_binding
107 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
122 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
138 character(len=*),
intent(inout) :: name
151 type(c_ptr),
intent(inout) :: x_d
152 integer(c_size_t) :: s
156 call neko_error(
'Memory allocation on device failed')
160 call neko_error(
'Memory allocation on device failed')
165 call neko_error(
'Memory allocation on device failed')
172 type(c_ptr),
intent(inout) :: x_d
175 call neko_error(
'Memory deallocation on device failed')
179 call neko_error(
'Memory deallocation on device failed')
182 if (clreleasememobject(x_d) .ne.
cl_success)
then
183 call neko_error(
'Memory deallocation on device failed')
191 integer,
intent(in) :: n
192 class(*),
intent(inout),
target :: x(:)
193 type(c_ptr),
intent(inout) :: x_d
194 integer,
intent(in),
value :: dir
196 type(c_ptr),
optional :: strm
197 type(c_ptr) :: ptr_h, copy_stream
198 integer(c_size_t) :: s
200 if (
present(strm))
then
210 type is (
integer(i8))
216 type is (double precision)
229 integer,
intent(in) :: n
230 class(*),
intent(inout),
target :: x(:,:)
231 type(c_ptr),
intent(inout) :: x_d
232 integer,
intent(in),
value :: dir
234 type(c_ptr),
optional :: strm
235 type(c_ptr) :: ptr_h, copy_stream
236 integer(c_size_t) :: s
238 if (
present(strm))
then
248 type is (
integer(i8))
254 type is (double precision)
267 integer,
intent(in) :: n
268 class(*),
intent(inout),
target :: x(:,:,:)
269 type(c_ptr),
intent(inout) :: x_d
270 integer,
intent(in),
value :: dir
272 type(c_ptr),
optional :: strm
273 type(c_ptr) :: ptr_h, copy_stream
274 integer(c_size_t) :: s
276 if (
present(strm))
then
286 type is (
integer(i8))
292 type is (double precision)
305 integer,
intent(in) :: n
306 class(*),
intent(inout),
target :: x(:,:,:,:)
307 type(c_ptr),
intent(inout) :: x_d
308 integer,
intent(in),
value :: dir
310 type(c_ptr),
optional :: strm
311 type(c_ptr) :: ptr_h, copy_stream
312 integer(c_size_t) :: s
314 if (
present(strm))
then
324 type is (
integer(i8))
330 type is (double precision)
345 type(c_ptr),
intent(inout) :: dst
346 type(c_ptr),
intent(inout) :: src
347 integer(c_size_t),
intent(in) :: s
348 integer,
intent(in),
value :: dir
349 logical,
optional :: sync
350 type(c_ptr),
optional :: strm
351 type(c_ptr) :: copy_stream
352 logical :: sync_device
354 if (
present(sync))
then
357 sync_device = .false.
360 if (
present(strm))
then
374 type(c_ptr),
intent(inout) :: ptr_h
375 type(c_ptr),
intent(inout) :: x_d
376 integer(c_size_t),
intent(in) :: s
377 integer,
intent(in),
value :: dir
378 logical,
intent(in) :: sync_device
379 type(c_ptr),
intent(inout) :: stream
382 if (hipmemcpyasync(x_d, ptr_h, s, &
384 call neko_error(
'Device memcpy async (host-to-device) failed')
387 if (hipmemcpyasync(ptr_h, x_d, s, &
389 call neko_error(
'Device memcpy async (device-to-host) failed')
392 if (hipmemcpyasync(ptr_h, x_d, s, &
394 call neko_error(
'Device memcpy async (device-to-device) failed')
397 call neko_error(
'Device memcpy failed (invalid direction')
399 if (sync_device)
then
404 if (cudamemcpyasync(x_d, ptr_h, s, &
406 call neko_error(
'Device memcpy async (host-to-device) failed')
409 if (cudamemcpyasync(ptr_h, x_d, s, &
411 call neko_error(
'Device memcpy async (device-to-host) failed')
414 if (cudamemcpyasync(ptr_h, x_d, s, &
416 call neko_error(
'Device memcpy async (device-to-device) failed')
419 call neko_error(
'Device memcpy failed (invalid direction')
421 if (sync_device)
then
425 if (sync_device)
then
428 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
429 call neko_error(
'Device memcpy (host-to-device) failed')
433 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
434 call neko_error(
'Device memcpy (device-to-host) failed')
437 if (clenqueuecopybuffer(
glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
438 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
439 call neko_error(
'Device memcpy (device-to-device) failed')
442 call neko_error(
'Device memcpy failed (invalid direction')
447 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
448 call neko_error(
'Device memcpy (host-to-device) failed')
452 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
453 call neko_error(
'Device memcpy (device-to-host) failed')
456 if (clenqueuecopybuffer(
glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
457 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
458 call neko_error(
'Device memcpy (device-to-device) failed')
461 call neko_error(
'Device memcpy failed (invalid direction')
469 class(*),
intent(inout),
target :: x(:)
470 type(c_ptr),
intent(inout) :: x_d
471 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
475 htbl_ptr_h%ptr = c_loc(x)
476 type is (
integer(i8))
477 htbl_ptr_h%ptr = c_loc(x)
479 htbl_ptr_h%ptr = c_loc(x)
480 type is (double precision)
481 htbl_ptr_h%ptr = c_loc(x)
494 class(*),
intent(inout),
target :: x(:,:)
495 type(c_ptr),
intent(inout) :: x_d
496 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
500 htbl_ptr_h%ptr = c_loc(x)
501 type is (
integer(i8))
502 htbl_ptr_h%ptr = c_loc(x)
504 htbl_ptr_h%ptr = c_loc(x)
505 type is (double precision)
506 htbl_ptr_h%ptr = c_loc(x)
519 class(*),
intent(inout),
target :: x(:,:,:)
520 type(c_ptr),
intent(inout) :: x_d
521 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
525 htbl_ptr_h%ptr = c_loc(x)
526 type is (
integer(i8))
527 htbl_ptr_h%ptr = c_loc(x)
529 htbl_ptr_h%ptr = c_loc(x)
530 type is (double precision)
531 htbl_ptr_h%ptr = c_loc(x)
544 class(*),
intent(inout),
target :: x(:,:,:,:)
545 type(c_ptr),
intent(inout) :: x_d
546 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
550 htbl_ptr_h%ptr = c_loc(x)
551 type is (
integer(i8))
552 htbl_ptr_h%ptr = c_loc(x)
554 htbl_ptr_h%ptr = c_loc(x)
555 type is (double precision)
556 htbl_ptr_h%ptr = c_loc(x)
569 class(*),
intent(inout),
target :: x(:)
570 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
574 htbl_ptr_h%ptr = c_loc(x)
575 type is (
integer(i8))
576 htbl_ptr_h%ptr = c_loc(x)
578 htbl_ptr_h%ptr = c_loc(x)
579 type is (double precision)
580 htbl_ptr_h%ptr = c_loc(x)
593 class(*),
intent(inout),
target :: x(:,:)
594 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
598 htbl_ptr_h%ptr = c_loc(x)
599 type is (
integer(i8))
600 htbl_ptr_h%ptr = c_loc(x)
602 htbl_ptr_h%ptr = c_loc(x)
603 type is (double precision)
604 htbl_ptr_h%ptr = c_loc(x)
617 class(*),
intent(inout),
target :: x(:,:,:)
618 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
622 htbl_ptr_h%ptr = c_loc(x)
623 type is (
integer(i8))
624 htbl_ptr_h%ptr = c_loc(x)
626 htbl_ptr_h%ptr = c_loc(x)
627 type is (double precision)
628 htbl_ptr_h%ptr = c_loc(x)
641 class(*),
intent(inout),
target :: x(:,:,:,:)
642 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
646 htbl_ptr_h%ptr = c_loc(x)
647 type is (
integer(i8))
648 htbl_ptr_h%ptr = c_loc(x)
650 htbl_ptr_h%ptr = c_loc(x)
651 type is (double precision)
652 htbl_ptr_h%ptr = c_loc(x)
665 integer,
intent(in) :: n
666 class(*),
intent(inout),
target :: x(:)
667 type(c_ptr),
intent(inout) :: x_d
668 integer(c_size_t) :: s
670 if (c_associated(x_d))
then
671 call neko_error(
'Device pointer already associated')
677 type is (
integer(i8))
681 type is (double precision)
694 integer,
intent(in) :: n
695 class(*),
intent(inout),
target :: x(:,:)
696 type(c_ptr),
intent(inout) :: x_d
697 integer(c_size_t) :: s
699 if (c_associated(x_d))
then
700 call neko_error(
'Device pointer already associated')
706 type is (
integer(i8))
710 type is (double precision)
723 integer,
intent(in) :: n
724 class(*),
intent(inout),
target :: x(:,:,:)
725 type(c_ptr),
intent(inout) :: x_d
726 integer(c_size_t) :: s
728 if (c_associated(x_d))
then
729 call neko_error(
'Device pointer already associated')
735 type is (
integer(i8))
739 type is (double precision)
752 integer,
intent(in) :: n
753 class(*),
intent(inout),
target :: x(:,:,:,:)
754 type(c_ptr),
intent(inout) :: x_d
755 integer(c_size_t) :: s
757 if (c_associated(x_d))
then
758 call neko_error(
'Device pointer already associated')
764 type is (
integer(i8))
768 type is (double precision)
781 class(*),
intent(inout),
target :: x(:)
782 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
787 htbl_ptr_h%ptr = c_loc(x)
788 type is (
integer(
i8))
789 htbl_ptr_h%ptr = c_loc(x)
791 htbl_ptr_h%ptr = c_loc(x)
792 type is (double precision)
793 htbl_ptr_h%ptr = c_loc(x)
808 class(*),
intent(inout),
target :: x(:,:)
809 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
814 htbl_ptr_h%ptr = c_loc(x)
815 type is (
integer(
i8))
816 htbl_ptr_h%ptr = c_loc(x)
818 htbl_ptr_h%ptr = c_loc(x)
819 type is (double precision)
820 htbl_ptr_h%ptr = c_loc(x)
835 class(*),
intent(inout),
target :: x(:,:,:)
836 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
841 htbl_ptr_h%ptr = c_loc(x)
842 type is (
integer(
i8))
843 htbl_ptr_h%ptr = c_loc(x)
845 htbl_ptr_h%ptr = c_loc(x)
846 type is (double precision)
847 htbl_ptr_h%ptr = c_loc(x)
862 class(*),
intent(inout),
target :: x(:,:,:,:)
863 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
868 htbl_ptr_h%ptr = c_loc(x)
869 type is (
integer(
i8))
870 htbl_ptr_h%ptr = c_loc(x)
872 htbl_ptr_h%ptr = c_loc(x)
873 type is (double precision)
874 htbl_ptr_h%ptr = c_loc(x)
889 class(*),
intent(in),
target :: x(:)
890 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
897 htbl_ptr_h%ptr = c_loc(x)
898 type is (
integer(
i8))
899 htbl_ptr_h%ptr = c_loc(x)
901 htbl_ptr_h%ptr = c_loc(x)
902 type is (double precision)
903 htbl_ptr_h%ptr = c_loc(x)
911 call neko_error(
'Array not associated with device')
917 class(*),
intent(in),
target :: x(:,:)
918 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
925 htbl_ptr_h%ptr = c_loc(x)
926 type is (
integer(
i8))
927 htbl_ptr_h%ptr = c_loc(x)
929 htbl_ptr_h%ptr = c_loc(x)
930 type is (double precision)
931 htbl_ptr_h%ptr = c_loc(x)
939 call neko_error(
'Array not associated with device')
945 class(*),
intent(in),
target :: x(:,:,:)
946 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
953 htbl_ptr_h%ptr = c_loc(x)
954 type is (
integer(
i8))
955 htbl_ptr_h%ptr = c_loc(x)
957 htbl_ptr_h%ptr = c_loc(x)
958 type is (double precision)
959 htbl_ptr_h%ptr = c_loc(x)
967 call neko_error(
'Array not associated with device')
973 class(*),
intent(in),
target :: x(:,:,:,:)
974 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
981 htbl_ptr_h%ptr = c_loc(x)
982 type is (
integer(
i8))
983 htbl_ptr_h%ptr = c_loc(x)
985 htbl_ptr_h%ptr = c_loc(x)
986 type is (double precision)
987 htbl_ptr_h%ptr = c_loc(x)
995 call neko_error(
'Array not associated with device')
1002 if (hipdevicesynchronize() .ne.
hipsuccess)
then
1006 if (cudadevicesynchronize() .ne.
cudasuccess)
then
1018 type(c_ptr),
intent(in) :: stream
1020 if (hipstreamsynchronize(stream) .ne.
hipsuccess)
then
1024 if (cudastreamsynchronize(stream) .ne.
cudasuccess)
then
1036 type(c_ptr),
intent(inout) :: stream
1037 integer,
optional :: flags
1040 if (
present(flags))
then
1041 if (hipstreamcreatewithflags(stream, flags) .ne.
hipsuccess)
then
1042 call neko_error(
'Error during stream create (w. flags)')
1045 if (hipstreamcreate(stream) .ne.
hipsuccess)
then
1046 call neko_error(
'Error during stream create')
1050 if (
present(flags))
then
1051 if (cudastreamcreatewithflags(stream, flags) .ne.
cudasuccess)
then
1052 call neko_error(
'Error during stream create (w. flags)')
1055 if (cudastreamcreate(stream) .ne.
cudasuccess)
then
1056 call neko_error(
'Error during stream create')
1060 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1062 call neko_error(
'Error during stream create')
1069 type(c_ptr),
intent(inout) :: stream
1070 integer,
intent(in) :: flags, prio
1072 if (hipstreamcreatewithpriority(stream, flags, prio) .ne.
hipsuccess)
then
1073 call neko_error(
'Error during stream create (w. priority)')
1076 if (cudastreamcreatewithpriority(stream, flags, prio) .ne.
cudasuccess)
then
1077 call neko_error(
'Error during stream create (w. priority)')
1086 type(c_ptr),
intent(inout) :: stream
1088 if (hipstreamdestroy(stream) .ne.
hipsuccess)
then
1089 call neko_error(
'Error during stream destroy')
1092 if (cudastreamdestroy(stream) .ne.
cudasuccess)
then
1093 call neko_error(
'Error during stream destroy')
1096 if (clreleasecommandqueue(stream) .ne.
cl_success)
then
1097 call neko_error(
'Error during stream destroy')
1104 type(c_ptr),
intent(in) :: stream
1105 type(c_ptr),
intent(in) :: event
1108 if (hipstreamwaitevent(stream, event, flags) .ne.
hipsuccess)
then
1112 if (cudastreamwaitevent(stream, event, flags) .ne.
cudasuccess)
then
1142 type(c_ptr),
intent(inout) :: event
1143 integer,
optional :: flags
1146 if (
present(flags))
then
1147 if (hipeventcreatewithflags(event, flags) .ne.
hipsuccess)
then
1148 call neko_error(
'Error during event create (w. flags)')
1151 if (hipeventcreate(event) .ne.
hipsuccess)
then
1156 if (
present(flags))
then
1157 if (cudaeventcreatewithflags(event, flags) .ne.
cudasuccess)
then
1158 call neko_error(
'Error during event create (w. flags)')
1172 type(c_ptr),
intent(inout) :: event
1174 if (hipeventdestroy(event) .ne.
hipsuccess)
then
1175 call neko_error(
'Error during event destroy')
1178 if (cudaeventdestroy(event) .ne.
cudasuccess)
then
1179 call neko_error(
'Error during event destroy')
1182 if (clreleaseevent(event) .ne.
cl_success)
then
1183 call neko_error(
'Error during event destroy')
1190 type(c_ptr),
intent(in) :: event
1191 type(c_ptr),
intent(in) :: stream
1193 if (hipeventrecord(event, stream) .ne.
hipsuccess)
then
1197 if (cudaeventrecord(event, stream) .ne.
cudasuccess)
then
1201 if (clenqueuemarkerwithwaitlist(stream, 0, c_null_ptr, event) .ne.
cl_success)
then
1209 type(c_ptr),
intent(in) :: event
1211 if (hipeventsynchronize(event) .ne.
hipsuccess)
then
1215 if (cudaeventsynchronize(event) .ne.
cudasuccess)
then
1219 if (clwaitforevents(1, event) .ne.
cl_success)
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
subroutine cuda_device_name(name)
@ cudamemcpydevicetodevice
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
subroutine device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_event_create(event, flags)
Create a device event queue.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
Implements a hash table ADT.
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize
OpenCL JIT program library.
subroutine opencl_prgm_lib_release
C pointer based hash table.