43 use,
intrinsic :: iso_c_binding
107 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
122 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
138 character(len=*),
intent(inout) :: name
164 type(c_ptr),
intent(inout) :: x_d
165 integer(c_size_t) :: s
169 call neko_error(
'Memory allocation on device failed')
173 call neko_error(
'Memory allocation on device failed')
178 call neko_error(
'Memory allocation on device failed')
185 type(c_ptr),
intent(inout) :: x_d
188 call neko_error(
'Memory deallocation on device failed')
192 call neko_error(
'Memory deallocation on device failed')
195 if (clreleasememobject(x_d) .ne.
cl_success)
then
196 call neko_error(
'Memory deallocation on device failed')
204 integer,
intent(in) :: n
205 class(*),
intent(inout),
target :: x(:)
206 type(c_ptr),
intent(inout) :: x_d
207 integer,
intent(in),
value :: dir
209 type(c_ptr),
optional :: strm
210 type(c_ptr) :: ptr_h, copy_stream
211 integer(c_size_t) :: s
213 if (
present(strm))
then
223 type is (
integer(i8))
229 type is (double precision)
242 integer,
intent(in) :: n
243 class(*),
intent(inout),
target :: x(:,:)
244 type(c_ptr),
intent(inout) :: x_d
245 integer,
intent(in),
value :: dir
247 type(c_ptr),
optional :: strm
248 type(c_ptr) :: ptr_h, copy_stream
249 integer(c_size_t) :: s
251 if (
present(strm))
then
261 type is (
integer(i8))
267 type is (double precision)
280 integer,
intent(in) :: n
281 class(*),
intent(inout),
target :: x(:,:,:)
282 type(c_ptr),
intent(inout) :: x_d
283 integer,
intent(in),
value :: dir
285 type(c_ptr),
optional :: strm
286 type(c_ptr) :: ptr_h, copy_stream
287 integer(c_size_t) :: s
289 if (
present(strm))
then
299 type is (
integer(i8))
305 type is (double precision)
318 integer,
intent(in) :: n
319 class(*),
intent(inout),
target :: x(:,:,:,:)
320 type(c_ptr),
intent(inout) :: x_d
321 integer,
intent(in),
value :: dir
323 type(c_ptr),
optional :: strm
324 type(c_ptr) :: ptr_h, copy_stream
325 integer(c_size_t) :: s
327 if (
present(strm))
then
337 type is (
integer(i8))
343 type is (double precision)
358 type(c_ptr),
intent(inout) :: dst
359 type(c_ptr),
intent(inout) :: src
360 integer(c_size_t),
intent(in) :: s
361 integer,
intent(in),
value :: dir
362 logical,
optional :: sync
363 type(c_ptr),
optional :: strm
364 type(c_ptr) :: copy_stream
365 logical :: sync_device
367 if (
present(sync))
then
370 sync_device = .false.
373 if (
present(strm))
then
387 type(c_ptr),
intent(inout) :: ptr_h
388 type(c_ptr),
intent(inout) :: x_d
389 integer(c_size_t),
intent(in) :: s
390 integer,
intent(in),
value :: dir
391 logical,
intent(in) :: sync_device
392 type(c_ptr),
intent(inout) :: stream
395 if (hipmemcpyasync(x_d, ptr_h, s, &
397 call neko_error(
'Device memcpy async (host-to-device) failed')
400 if (hipmemcpyasync(ptr_h, x_d, s, &
402 call neko_error(
'Device memcpy async (device-to-host) failed')
407 call neko_error(
'Device memcpy async (device-to-device) failed')
410 call neko_error(
'Device memcpy failed (invalid direction')
412 if (sync_device)
then
419 call neko_error(
'Device memcpy async (host-to-device) failed')
424 call neko_error(
'Device memcpy async (device-to-host) failed')
429 call neko_error(
'Device memcpy async (device-to-device) failed')
432 call neko_error(
'Device memcpy failed (invalid direction')
434 if (sync_device)
then
438 if (sync_device)
then
440 if (clenqueuewritebuffer(stream, x_d,
cl_true, 0_i8, s, &
441 ptr_h, 0, c_null_ptr, c_null_ptr) &
443 call neko_error(
'Device memcpy (host-to-device) failed')
446 if (clenqueuereadbuffer(stream, x_d,
cl_true, 0_i8, s, ptr_h, &
447 0, c_null_ptr, c_null_ptr) &
449 call neko_error(
'Device memcpy (device-to-host) failed')
452 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
453 0, c_null_ptr, c_null_ptr) &
455 call neko_error(
'Device memcpy (device-to-device) failed')
458 call neko_error(
'Device memcpy failed (invalid direction')
462 if (clenqueuewritebuffer(stream, x_d,
cl_false, 0_i8, s, &
463 ptr_h, 0, c_null_ptr, c_null_ptr) &
465 call neko_error(
'Device memcpy (host-to-device) failed')
468 if (clenqueuereadbuffer(stream, x_d,
cl_false, 0_i8, s, ptr_h,&
469 0, c_null_ptr, c_null_ptr) &
471 call neko_error(
'Device memcpy (device-to-host) failed')
474 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
475 0, c_null_ptr, c_null_ptr) &
477 call neko_error(
'Device memcpy (device-to-device) failed')
480 call neko_error(
'Device memcpy failed (invalid direction')
488 class(*),
intent(inout),
target :: x(:)
489 type(c_ptr),
intent(inout) :: x_d
490 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
494 htbl_ptr_h%ptr = c_loc(x)
495 type is (
integer(i8))
496 htbl_ptr_h%ptr = c_loc(x)
498 htbl_ptr_h%ptr = c_loc(x)
499 type is (double precision)
500 htbl_ptr_h%ptr = c_loc(x)
513 class(*),
intent(inout),
target :: x(:,:)
514 type(c_ptr),
intent(inout) :: x_d
515 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
519 htbl_ptr_h%ptr = c_loc(x)
520 type is (
integer(i8))
521 htbl_ptr_h%ptr = c_loc(x)
523 htbl_ptr_h%ptr = c_loc(x)
524 type is (double precision)
525 htbl_ptr_h%ptr = c_loc(x)
538 class(*),
intent(inout),
target :: x(:,:,:)
539 type(c_ptr),
intent(inout) :: x_d
540 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
544 htbl_ptr_h%ptr = c_loc(x)
545 type is (
integer(i8))
546 htbl_ptr_h%ptr = c_loc(x)
548 htbl_ptr_h%ptr = c_loc(x)
549 type is (double precision)
550 htbl_ptr_h%ptr = c_loc(x)
563 class(*),
intent(inout),
target :: x(:,:,:,:)
564 type(c_ptr),
intent(inout) :: x_d
565 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
569 htbl_ptr_h%ptr = c_loc(x)
570 type is (
integer(i8))
571 htbl_ptr_h%ptr = c_loc(x)
573 htbl_ptr_h%ptr = c_loc(x)
574 type is (double precision)
575 htbl_ptr_h%ptr = c_loc(x)
588 class(*),
intent(inout),
target :: x(:)
589 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
593 htbl_ptr_h%ptr = c_loc(x)
594 type is (
integer(i8))
595 htbl_ptr_h%ptr = c_loc(x)
597 htbl_ptr_h%ptr = c_loc(x)
598 type is (double precision)
599 htbl_ptr_h%ptr = c_loc(x)
612 class(*),
intent(inout),
target :: x(:,:)
613 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
617 htbl_ptr_h%ptr = c_loc(x)
618 type is (
integer(i8))
619 htbl_ptr_h%ptr = c_loc(x)
621 htbl_ptr_h%ptr = c_loc(x)
622 type is (double precision)
623 htbl_ptr_h%ptr = c_loc(x)
636 class(*),
intent(inout),
target :: x(:,:,:)
637 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
641 htbl_ptr_h%ptr = c_loc(x)
642 type is (
integer(i8))
643 htbl_ptr_h%ptr = c_loc(x)
645 htbl_ptr_h%ptr = c_loc(x)
646 type is (double precision)
647 htbl_ptr_h%ptr = c_loc(x)
660 class(*),
intent(inout),
target :: x(:,:,:,:)
661 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
665 htbl_ptr_h%ptr = c_loc(x)
666 type is (
integer(i8))
667 htbl_ptr_h%ptr = c_loc(x)
669 htbl_ptr_h%ptr = c_loc(x)
670 type is (double precision)
671 htbl_ptr_h%ptr = c_loc(x)
684 integer,
intent(in) :: n
685 class(*),
intent(inout),
target :: x(:)
686 type(c_ptr),
intent(inout) :: x_d
687 integer(c_size_t) :: s
689 if (c_associated(x_d))
then
690 call neko_error(
'Device pointer already associated')
696 type is (
integer(i8))
700 type is (double precision)
713 integer,
intent(in) :: n
714 class(*),
intent(inout),
target :: x(:,:)
715 type(c_ptr),
intent(inout) :: x_d
716 integer(c_size_t) :: s
718 if (c_associated(x_d))
then
719 call neko_error(
'Device pointer already associated')
725 type is (
integer(i8))
729 type is (double precision)
742 integer,
intent(in) :: n
743 class(*),
intent(inout),
target :: x(:,:,:)
744 type(c_ptr),
intent(inout) :: x_d
745 integer(c_size_t) :: s
747 if (c_associated(x_d))
then
748 call neko_error(
'Device pointer already associated')
754 type is (
integer(i8))
758 type is (double precision)
771 integer,
intent(in) :: n
772 class(*),
intent(inout),
target :: x(:,:,:,:)
773 type(c_ptr),
intent(inout) :: x_d
774 integer(c_size_t) :: s
776 if (c_associated(x_d))
then
777 call neko_error(
'Device pointer already associated')
783 type is (
integer(i8))
787 type is (double precision)
800 class(*),
intent(inout),
target :: x(:)
801 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
806 htbl_ptr_h%ptr = c_loc(x)
807 type is (
integer(
i8))
808 htbl_ptr_h%ptr = c_loc(x)
810 htbl_ptr_h%ptr = c_loc(x)
811 type is (double precision)
812 htbl_ptr_h%ptr = c_loc(x)
827 class(*),
intent(inout),
target :: x(:,:)
828 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
833 htbl_ptr_h%ptr = c_loc(x)
834 type is (
integer(
i8))
835 htbl_ptr_h%ptr = c_loc(x)
837 htbl_ptr_h%ptr = c_loc(x)
838 type is (double precision)
839 htbl_ptr_h%ptr = c_loc(x)
854 class(*),
intent(inout),
target :: x(:,:,:)
855 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
860 htbl_ptr_h%ptr = c_loc(x)
861 type is (
integer(
i8))
862 htbl_ptr_h%ptr = c_loc(x)
864 htbl_ptr_h%ptr = c_loc(x)
865 type is (double precision)
866 htbl_ptr_h%ptr = c_loc(x)
881 class(*),
intent(inout),
target :: x(:,:,:,:)
882 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
887 htbl_ptr_h%ptr = c_loc(x)
888 type is (
integer(
i8))
889 htbl_ptr_h%ptr = c_loc(x)
891 htbl_ptr_h%ptr = c_loc(x)
892 type is (double precision)
893 htbl_ptr_h%ptr = c_loc(x)
908 class(*),
intent(in),
target :: x(:)
909 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
916 htbl_ptr_h%ptr = c_loc(x)
917 type is (
integer(
i8))
918 htbl_ptr_h%ptr = c_loc(x)
920 htbl_ptr_h%ptr = c_loc(x)
921 type is (double precision)
922 htbl_ptr_h%ptr = c_loc(x)
930 call neko_error(
'Array not associated with device')
936 class(*),
intent(in),
target :: x(:,:)
937 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
944 htbl_ptr_h%ptr = c_loc(x)
945 type is (
integer(
i8))
946 htbl_ptr_h%ptr = c_loc(x)
948 htbl_ptr_h%ptr = c_loc(x)
949 type is (double precision)
950 htbl_ptr_h%ptr = c_loc(x)
958 call neko_error(
'Array not associated with device')
964 class(*),
intent(in),
target :: x(:,:,:)
965 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
972 htbl_ptr_h%ptr = c_loc(x)
973 type is (
integer(
i8))
974 htbl_ptr_h%ptr = c_loc(x)
976 htbl_ptr_h%ptr = c_loc(x)
977 type is (double precision)
978 htbl_ptr_h%ptr = c_loc(x)
986 call neko_error(
'Array not associated with device')
992 class(*),
intent(in),
target :: x(:,:,:,:)
993 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1000 htbl_ptr_h%ptr = c_loc(x)
1001 type is (
integer(
i8))
1002 htbl_ptr_h%ptr = c_loc(x)
1004 htbl_ptr_h%ptr = c_loc(x)
1005 type is (double precision)
1006 htbl_ptr_h%ptr = c_loc(x)
1014 call neko_error(
'Array not associated with device')
1021 if (hipdevicesynchronize() .ne.
hipsuccess)
then
1025 if (cudadevicesynchronize() .ne.
cudasuccess)
then
1037 type(c_ptr),
intent(in) :: stream
1039 if (hipstreamsynchronize(stream) .ne.
hipsuccess)
then
1043 if (cudastreamsynchronize(stream) .ne.
cudasuccess)
then
1055 type(c_ptr),
intent(inout) :: stream
1056 integer,
optional :: flags
1059 if (
present(flags))
then
1060 if (hipstreamcreatewithflags(stream, flags) .ne.
hipsuccess)
then
1061 call neko_error(
'Error during stream create (w. flags)')
1064 if (hipstreamcreate(stream) .ne.
hipsuccess)
then
1065 call neko_error(
'Error during stream create')
1069 if (
present(flags))
then
1070 if (cudastreamcreatewithflags(stream, flags) .ne.
cudasuccess)
then
1071 call neko_error(
'Error during stream create (w. flags)')
1074 if (cudastreamcreate(stream) .ne.
cudasuccess)
then
1075 call neko_error(
'Error during stream create')
1079 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1081 call neko_error(
'Error during stream create')
1088 type(c_ptr),
intent(inout) :: stream
1089 integer,
intent(in) :: flags, prio
1091 if (hipstreamcreatewithpriority(stream, flags, prio) .ne.
hipsuccess)
then
1092 call neko_error(
'Error during stream create (w. priority)')
1095 if (cudastreamcreatewithpriority(stream, flags, prio) .ne.
cudasuccess)
then
1096 call neko_error(
'Error during stream create (w. priority)')
1105 type(c_ptr),
intent(inout) :: stream
1107 if (hipstreamdestroy(stream) .ne.
hipsuccess)
then
1108 call neko_error(
'Error during stream destroy')
1111 if (cudastreamdestroy(stream) .ne.
cudasuccess)
then
1112 call neko_error(
'Error during stream destroy')
1115 if (clreleasecommandqueue(stream) .ne.
cl_success)
then
1116 call neko_error(
'Error during stream destroy')
1123 type(c_ptr),
intent(in) :: stream
1124 type(c_ptr),
target,
intent(in) :: event
1127 if (hipstreamwaitevent(stream, event, flags) .ne.
hipsuccess)
then
1131 if (cudastreamwaitevent(stream, event, flags) .ne.
cudasuccess)
then
1135 if (clenqueuebarrier(stream) .ne.
cl_success)
then
1138 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne.
cl_success)
then
1164 type(c_ptr),
intent(inout) :: event
1165 integer,
optional :: flags
1168 if (
present(flags))
then
1169 if (hipeventcreatewithflags(event, flags) .ne.
hipsuccess)
then
1170 call neko_error(
'Error during event create (w. flags)')
1173 if (hipeventcreate(event) .ne.
hipsuccess)
then
1178 if (
present(flags))
then
1179 if (cudaeventcreatewithflags(event, flags) .ne.
cudasuccess)
then
1180 call neko_error(
'Error during event create (w. flags)')
1194 type(c_ptr),
intent(inout) :: event
1196 if (hipeventdestroy(event) .ne.
hipsuccess)
then
1197 call neko_error(
'Error during event destroy')
1200 if (cudaeventdestroy(event) .ne.
cudasuccess)
then
1201 call neko_error(
'Error during event destroy')
1210 type(c_ptr),
target,
intent(in) :: event
1211 type(c_ptr),
intent(in) :: stream
1213 if (hipeventrecord(event, stream) .ne.
hipsuccess)
then
1217 if (cudaeventrecord(event, stream) .ne.
cudasuccess)
then
1221 if (clenqueuemarker(stream, c_loc(event)) .ne.
cl_success)
then
1229 type(c_ptr),
target,
intent(in) :: event
1231 if (hipeventsynchronize(event) .ne.
hipsuccess)
then
1235 if (cudaeventsynchronize(event) .ne.
cudasuccess)
then
1239 if (c_associated(event))
then
1240 if (clwaitforevents(1, c_loc(event)) .ne.
cl_success)
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
subroutine cuda_device_name(name)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
subroutine device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_event_create(event, flags)
Create a device event queue.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
integer function device_count()
Return the number of available devices.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
integer function hip_device_count()
Return the number of available HIP devices.
Implements a hash table ADT.
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_finalize
OpenCL JIT program library.
subroutine opencl_prgm_lib_release
C pointer based hash table.