44 use,
intrinsic :: iso_c_binding
135#if defined(HAVE_HIP) || defined(HAVE_CUDA) || \
136 defined(have_opencl) || defined(have_metal)
154 call neko_error(
'Only one device is supported per MPI rank')
160#if defined(HAVE_HIP) || defined(HAVE_CUDA) || \
161 defined(have_opencl) || defined(have_metal)
179 character(len=*),
intent(inout) :: name
188 call metal_device_name(name)
209 type(c_ptr),
intent(inout) :: x_d
210 integer(c_size_t) :: s
220 call neko_error(
'Memory allocation on device failed')
224 call neko_error(
'Memory allocation on device failed')
229 call neko_error(
'Memory allocation on device failed')
232 if (metalalloc(x_d, s) .ne. metalsuccess)
then
233 call neko_error(
'Memory allocation on device failed')
240 type(c_ptr),
intent(inout) :: x_d
243 call neko_error(
'Memory deallocation on device failed')
247 call neko_error(
'Memory deallocation on device failed')
251 call neko_error(
'Memory deallocation on device failed')
254 if (metalfree(x_d) .ne. metalsuccess)
then
255 call neko_error(
'Memory deallocation on device failed')
263 type(c_ptr),
intent(inout) :: x_d
264 integer(c_int),
target,
value :: v
265 integer(c_size_t),
intent(in) :: s
266 logical,
optional :: sync
267 type(c_ptr),
optional :: strm
268 type(c_ptr) :: stream
269 logical :: sync_device
271 if (
present(sync))
then
274 sync_device = .false.
277 if (
present(strm))
then
293 s, 0, c_null_ptr, c_null_ptr) .ne.
cl_success)
then
297 if (metalmemset(x_d, v, s) .ne. metalsuccess)
then
302 if (sync_device)
then
310 integer,
intent(in) :: n
311 class(*),
intent(inout),
target :: x(:)
312 type(c_ptr),
intent(inout) :: x_d
313 integer,
intent(in),
value :: dir
315 type(c_ptr),
optional :: strm
316 type(c_ptr) :: ptr_h, copy_stream
317 integer(c_size_t) :: s
319 if (
present(strm))
then
327 s = n * int(4, c_size_t)
329 type is (
integer(i8))
330 s = n * int(8, c_size_t)
333 s = n * int(4, c_size_t)
335 type is (double precision)
336 s = n * int(8, c_size_t)
348 integer,
intent(in) :: n
349 class(*),
intent(inout),
target :: x(:,:)
350 type(c_ptr),
intent(inout) :: x_d
351 integer,
intent(in),
value :: dir
353 type(c_ptr),
optional :: strm
354 type(c_ptr) :: ptr_h, copy_stream
355 integer(c_size_t) :: s
357 if (
present(strm))
then
365 s = n * int(4, c_size_t)
367 type is (
integer(i8))
368 s = n * int(8, c_size_t)
371 s = n * int(4, c_size_t)
373 type is (double precision)
374 s = n * int(8, c_size_t)
386 integer,
intent(in) :: n
387 class(*),
intent(inout),
target :: x(:,:,:)
388 type(c_ptr),
intent(inout) :: x_d
389 integer,
intent(in),
value :: dir
391 type(c_ptr),
optional :: strm
392 type(c_ptr) :: ptr_h, copy_stream
393 integer(c_size_t) :: s
395 if (
present(strm))
then
403 s = n * int(4, c_size_t)
405 type is (
integer(i8))
406 s = n * int(8, c_size_t)
409 s = n * int(4, c_size_t)
411 type is (double precision)
412 s = n * int(8, c_size_t)
424 integer,
intent(in) :: n
425 class(*),
intent(inout),
target :: x(:,:,:,:)
426 type(c_ptr),
intent(inout) :: x_d
427 integer,
intent(in),
value :: dir
429 type(c_ptr),
optional :: strm
430 type(c_ptr) :: ptr_h, copy_stream
431 integer(c_size_t) :: s
433 if (
present(strm))
then
441 s = n * int(4, c_size_t)
443 type is (
integer(i8))
444 s = n * int(8, c_size_t)
447 s = n * int(4, c_size_t)
449 type is (double precision)
450 s = n * int(8, c_size_t)
464 type(c_ptr),
intent(inout) :: dst
465 type(c_ptr),
intent(inout) :: src
466 integer(c_size_t),
intent(in) :: s
467 integer,
intent(in),
value :: dir
468 logical,
optional :: sync
469 type(c_ptr),
optional :: strm
470 type(c_ptr) :: copy_stream
471 logical :: sync_device
473 if (
present(sync))
then
476 sync_device = .false.
479 if (
present(strm))
then
493 type(c_ptr),
intent(inout) :: ptr_h
494 type(c_ptr),
intent(inout) :: x_d
495 integer(c_size_t),
intent(in) :: s
496 integer,
intent(in),
value :: dir
497 logical,
intent(in) :: sync_device
498 type(c_ptr),
intent(inout) :: stream
501 if (sync_device)
then
511 call neko_error(
'Device memcpy async (host-to-device) failed')
516 call neko_error(
'Device memcpy async (device-to-host) failed')
521 call neko_error(
'Device memcpy async (device-to-device) failed')
524 call neko_error(
'Device memcpy failed (invalid direction')
526 if (sync_device)
then
533 call neko_error(
'Device memcpy async (host-to-device) failed')
538 call neko_error(
'Device memcpy async (device-to-host) failed')
543 call neko_error(
'Device memcpy async (device-to-device) failed')
546 call neko_error(
'Device memcpy failed (invalid direction')
548 if (sync_device)
then
552 if (sync_device)
then
555 ptr_h, 0, c_null_ptr, c_null_ptr) &
557 call neko_error(
'Device memcpy (host-to-device) failed')
561 0, c_null_ptr, c_null_ptr) &
563 call neko_error(
'Device memcpy (device-to-host) failed')
567 0, c_null_ptr, c_null_ptr) &
569 call neko_error(
'Device memcpy (device-to-device) failed')
572 call neko_error(
'Device memcpy failed (invalid direction')
577 ptr_h, 0, c_null_ptr, c_null_ptr) &
579 call neko_error(
'Device memcpy (host-to-device) failed')
583 0, c_null_ptr, c_null_ptr) &
585 call neko_error(
'Device memcpy (device-to-host) failed')
589 0, c_null_ptr, c_null_ptr) &
591 call neko_error(
'Device memcpy (device-to-device) failed')
594 call neko_error(
'Device memcpy failed (invalid direction')
601 if (metalmemcpyhtod(x_d, ptr_h, s) .ne. metalsuccess)
then
602 call neko_error(
'Device memcpy (host-to-device) failed')
605 if (metalmemcpydtoh(ptr_h, x_d, s) .ne. metalsuccess)
then
606 call neko_error(
'Device memcpy (device-to-host) failed')
609 if (metalmemcpydtod(ptr_h, x_d, s) .ne. metalsuccess)
then
610 call neko_error(
'Device memcpy (device-to-device) failed')
613 call neko_error(
'Device memcpy failed (invalid direction')
620 class(*),
intent(inout),
target :: x(:)
621 type(c_ptr),
intent(inout) :: x_d
622 integer,
intent(in),
optional :: n
623 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
632 if (n_ .eq. 0)
return
633 if (.not. c_associated(x_d))
call neko_error(
'Attempting to associate' // &
634 ' to a null device pointer for a non-empty array')
638 htbl_ptr_h%ptr = c_loc(x)
639 type is (
integer(i8))
640 htbl_ptr_h%ptr = c_loc(x)
642 htbl_ptr_h%ptr = c_loc(x)
643 type is (double precision)
644 htbl_ptr_h%ptr = c_loc(x)
657 class(*),
intent(inout),
target :: x(:,:)
658 type(c_ptr),
intent(inout) :: x_d
659 integer,
intent(in),
optional :: n
660 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
669 if (n_ .eq. 0)
return
670 if (.not. c_associated(x_d))
call neko_error(
'Attempting to associate' // &
671 ' to a null device pointer for a non-empty array')
675 htbl_ptr_h%ptr = c_loc(x)
676 type is (
integer(i8))
677 htbl_ptr_h%ptr = c_loc(x)
679 htbl_ptr_h%ptr = c_loc(x)
680 type is (double precision)
681 htbl_ptr_h%ptr = c_loc(x)
694 class(*),
intent(inout),
target :: x(:,:,:)
695 type(c_ptr),
intent(inout) :: x_d
696 integer,
intent(in),
optional :: n
697 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
706 if (n_ .eq. 0)
return
707 if (.not. c_associated(x_d))
call neko_error(
'Attempting to associate' // &
708 ' to a null device pointer for a non-empty array')
711 htbl_ptr_h%ptr = c_loc(x)
712 type is (
integer(i8))
713 htbl_ptr_h%ptr = c_loc(x)
715 htbl_ptr_h%ptr = c_loc(x)
716 type is (double precision)
717 htbl_ptr_h%ptr = c_loc(x)
730 class(*),
intent(inout),
target :: x(:,:,:,:)
731 type(c_ptr),
intent(inout) :: x_d
732 integer,
intent(in),
optional :: n
733 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
742 if (n_ .eq. 0)
return
743 if (.not. c_associated(x_d))
call neko_error(
'Attempting to associate' // &
744 ' to a null device pointer for a non-empty array')
748 htbl_ptr_h%ptr = c_loc(x)
749 type is (
integer(i8))
750 htbl_ptr_h%ptr = c_loc(x)
752 htbl_ptr_h%ptr = c_loc(x)
753 type is (double precision)
754 htbl_ptr_h%ptr = c_loc(x)
767 class(*),
intent(inout),
target :: x(:)
768 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
772 htbl_ptr_h%ptr = c_loc(x)
773 type is (
integer(i8))
774 htbl_ptr_h%ptr = c_loc(x)
776 htbl_ptr_h%ptr = c_loc(x)
777 type is (double precision)
778 htbl_ptr_h%ptr = c_loc(x)
791 class(*),
intent(inout),
target :: x(:,:)
792 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
796 htbl_ptr_h%ptr = c_loc(x)
797 type is (
integer(i8))
798 htbl_ptr_h%ptr = c_loc(x)
800 htbl_ptr_h%ptr = c_loc(x)
801 type is (double precision)
802 htbl_ptr_h%ptr = c_loc(x)
815 class(*),
intent(inout),
target :: x(:,:,:)
816 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
820 htbl_ptr_h%ptr = c_loc(x)
821 type is (
integer(i8))
822 htbl_ptr_h%ptr = c_loc(x)
824 htbl_ptr_h%ptr = c_loc(x)
825 type is (double precision)
826 htbl_ptr_h%ptr = c_loc(x)
839 class(*),
intent(inout),
target :: x(:,:,:,:)
840 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
844 htbl_ptr_h%ptr = c_loc(x)
845 type is (
integer(i8))
846 htbl_ptr_h%ptr = c_loc(x)
848 htbl_ptr_h%ptr = c_loc(x)
849 type is (double precision)
850 htbl_ptr_h%ptr = c_loc(x)
863 integer,
intent(in) :: n
864 class(*),
intent(inout),
target :: x(:)
865 type(c_ptr),
intent(inout) :: x_d
866 integer(c_size_t) :: s
868 if (c_associated(x_d))
then
869 call neko_error(
'Device pointer already associated')
874 s = n * int(4, c_size_t)
875 type is (
integer(i8))
876 s = n * int(8, c_size_t)
878 s = n * int(4, c_size_t)
879 type is (double precision)
880 s = n * int(8, c_size_t)
892 integer,
intent(in) :: n
893 class(*),
intent(inout),
target :: x(:,:)
894 type(c_ptr),
intent(inout) :: x_d
895 integer(c_size_t) :: s
897 if (c_associated(x_d))
then
898 call neko_error(
'Device pointer already associated')
903 s = n * int(4, c_size_t)
904 type is (
integer(i8))
905 s = n * int(8, c_size_t)
907 s = n * int(4, c_size_t)
908 type is (double precision)
909 s = n * int(8, c_size_t)
921 integer,
intent(in) :: n
922 class(*),
intent(inout),
target :: x(:,:,:)
923 type(c_ptr),
intent(inout) :: x_d
924 integer(c_size_t) :: s
926 if (c_associated(x_d))
then
927 call neko_error(
'Device pointer already associated')
932 s = n * int(4, c_size_t)
933 type is (
integer(i8))
934 s = n * int(8, c_size_t)
936 s = n * int(4, c_size_t)
937 type is (double precision)
938 s = n * int(8, c_size_t)
950 integer,
intent(in) :: n
951 class(*),
intent(inout),
target :: x(:,:,:,:)
952 type(c_ptr),
intent(inout) :: x_d
953 integer(c_size_t) :: s
955 if (c_associated(x_d))
then
956 call neko_error(
'Device pointer already associated')
961 s = n * int(4, c_size_t)
962 type is (
integer(i8))
963 s = n * int(8, c_size_t)
965 s = n * int(4, c_size_t)
966 type is (double precision)
967 s = n * int(8, c_size_t)
979 class(*),
intent(inout),
target :: x(:)
980 type(c_ptr),
intent(inout) :: x_d
988 if ((.not. mapped) .and. (.not. c_associated(x_d)))
then
1003 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1004 (.not. c_associated(dev, x_d)))
then
1005 call neko_error(
'Inconsistent host/device mapping state in ' // &
1016 class(*),
intent(inout),
target :: x(:,:)
1017 type(c_ptr),
intent(inout) :: x_d
1025 if ((.not. mapped) .and. (.not. c_associated(x_d)))
then
1040 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1041 (.not. c_associated(dev, x_d)))
then
1042 call neko_error(
'Inconsistent host/device mapping state in ' // &
1053 class(*),
intent(inout),
target :: x(:,:,:)
1054 type(c_ptr),
intent(inout) :: x_d
1062 if ((.not. mapped) .and. (.not. c_associated(x_d)))
then
1077 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1078 (.not. c_associated(dev, x_d)))
then
1079 call neko_error(
'Inconsistent host/device mapping state in ' // &
1090 class(*),
intent(inout),
target :: x(:,:,:,:)
1091 type(c_ptr),
intent(inout) :: x_d
1099 if ((.not. mapped) .and. (.not. c_associated(x_d)))
then
1114 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1115 (.not. c_associated(dev, x_d)))
then
1116 call neko_error(
'Inconsistent host/device mapping state in ' // &
1127 class(*),
intent(inout),
target :: x(:)
1128 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1133 htbl_ptr_h%ptr = c_loc(x)
1134 type is (
integer(
i8))
1135 htbl_ptr_h%ptr = c_loc(x)
1137 htbl_ptr_h%ptr = c_loc(x)
1138 type is (double precision)
1139 htbl_ptr_h%ptr = c_loc(x)
1154 class(*),
intent(inout),
target :: x(:,:)
1155 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1160 htbl_ptr_h%ptr = c_loc(x)
1161 type is (
integer(
i8))
1162 htbl_ptr_h%ptr = c_loc(x)
1164 htbl_ptr_h%ptr = c_loc(x)
1165 type is (double precision)
1166 htbl_ptr_h%ptr = c_loc(x)
1181 class(*),
intent(inout),
target :: x(:,:,:)
1182 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1187 htbl_ptr_h%ptr = c_loc(x)
1188 type is (
integer(
i8))
1189 htbl_ptr_h%ptr = c_loc(x)
1191 htbl_ptr_h%ptr = c_loc(x)
1192 type is (double precision)
1193 htbl_ptr_h%ptr = c_loc(x)
1208 class(*),
intent(inout),
target :: x(:,:,:,:)
1209 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1214 htbl_ptr_h%ptr = c_loc(x)
1215 type is (
integer(
i8))
1216 htbl_ptr_h%ptr = c_loc(x)
1218 htbl_ptr_h%ptr = c_loc(x)
1219 type is (double precision)
1220 htbl_ptr_h%ptr = c_loc(x)
1235 class(*),
intent(in),
target :: x(:)
1236 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1243 htbl_ptr_h%ptr = c_loc(x)
1244 type is (
integer(
i8))
1245 htbl_ptr_h%ptr = c_loc(x)
1247 htbl_ptr_h%ptr = c_loc(x)
1248 type is (double precision)
1249 htbl_ptr_h%ptr = c_loc(x)
1257 call neko_error(
'Array not associated with device')
1263 class(*),
intent(in),
target :: x(:,:)
1264 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1271 htbl_ptr_h%ptr = c_loc(x)
1272 type is (
integer(
i8))
1273 htbl_ptr_h%ptr = c_loc(x)
1275 htbl_ptr_h%ptr = c_loc(x)
1276 type is (double precision)
1277 htbl_ptr_h%ptr = c_loc(x)
1285 call neko_error(
'Array not associated with device')
1291 class(*),
intent(in),
target :: x(:,:,:)
1292 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1299 htbl_ptr_h%ptr = c_loc(x)
1300 type is (
integer(
i8))
1301 htbl_ptr_h%ptr = c_loc(x)
1303 htbl_ptr_h%ptr = c_loc(x)
1304 type is (double precision)
1305 htbl_ptr_h%ptr = c_loc(x)
1313 call neko_error(
'Array not associated with device')
1319 class(*),
intent(in),
target :: x(:,:,:,:)
1320 type(
h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1327 htbl_ptr_h%ptr = c_loc(x)
1328 type is (
integer(
i8))
1329 htbl_ptr_h%ptr = c_loc(x)
1331 htbl_ptr_h%ptr = c_loc(x)
1332 type is (double precision)
1333 htbl_ptr_h%ptr = c_loc(x)
1341 call neko_error(
'Array not associated with device')
1360 if (metaldevicesynchronize() .ne. metalsuccess)
then
1368 type(c_ptr),
intent(in) :: stream
1382 if (metalstreamsynchronize(stream) .ne. metalsuccess)
then
1390 type(c_ptr),
intent(inout) :: stream
1391 integer,
optional :: flags
1394 if (
present(flags))
then
1396 call neko_error(
'Error during stream create (w. flags)')
1400 call neko_error(
'Error during stream create')
1404 if (
present(flags))
then
1406 call neko_error(
'Error during stream create (w. flags)')
1410 call neko_error(
'Error during stream create')
1416 call neko_error(
'Error during stream create')
1419 if (metalstreamcreate(stream) .ne. metalsuccess)
then
1420 call neko_error(
'Error during stream create')
1427 type(c_ptr),
intent(inout) :: stream
1428 integer,
intent(in) :: flags, prio
1431 call neko_error(
'Error during stream create (w. priority)')
1435 call neko_error(
'Error during stream create (w. priority)')
1441 if (metalstreamcreate(stream) .ne. metalsuccess)
then
1442 call neko_error(
'Error during stream create (w. priority)')
1449 type(c_ptr),
intent(inout) :: stream
1452 call neko_error(
'Error during stream destroy')
1456 call neko_error(
'Error during stream destroy')
1460 call neko_error(
'Error during stream destroy')
1463 if (metalstreamdestroy(stream) .ne. metalsuccess)
then
1464 call neko_error(
'Error during stream destroy')
1471 type(c_ptr),
intent(in) :: stream
1472 type(c_ptr),
target,
intent(in) :: event
1490 if (metalstreamwaitevent(stream, event) .ne. metalsuccess)
then
1516 type(c_ptr),
intent(inout) :: event
1517 integer,
optional :: flags
1520 if (
present(flags))
then
1522 call neko_error(
'Error during event create (w. flags)')
1530 if (
present(flags))
then
1532 call neko_error(
'Error during event create (w. flags)')
1542 if (metaleventcreate(event) .ne. metalsuccess)
then
1550 type(c_ptr),
intent(inout) :: event
1553 call neko_error(
'Error during event destroy')
1557 call neko_error(
'Error during event destroy')
1562 if (metaleventdestroy(event) .ne. metalsuccess)
then
1563 call neko_error(
'Error during event destroy')
1571 type(c_ptr),
target,
intent(in) :: event
1572 type(c_ptr),
intent(in) :: stream
1586 if (metaleventrecord(event, stream) .ne. metalsuccess)
then
1594 type(c_ptr),
target,
intent(in) :: event
1604 if (c_associated(event))
then
1610 if (c_associated(event))
then
1611 if (metaleventsynchronize(event) .ne. metalsuccess)
then
Associate a Fortran array to a (allocated) device pointer.
Check if a Fortran array is assoicated with a device pointer.
Deassociate a Fortran array from a device pointer.
Return the device pointer for an associated Fortran array.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
Unmap a Fortran array from a device (deassociate and free)
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
subroutine, public device_event_record(event, stream)
Record a device event.
subroutine device_associate_r4(x, x_d, n)
Associate a Fortran rank 4 array to a (allocated) device pointer.
subroutine, public device_event_sync(event)
Synchronize an event.
subroutine device_associate_r1(x, x_d, n)
Associate a Fortran rank 1 array to a (allocated) device pointer.
subroutine, public device_finalize
integer, parameter, public device_to_device
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
integer, public strm_low_prio
Low priority stream setting.
integer, parameter, public host_to_device
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
subroutine device_unmap_r4(x, x_d)
Unmap a Fortran rank 4 array from a device (deassociate and free)
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
subroutine, public device_profiler_stop()
Stop device profiling.
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
subroutine device_unmap_r2(x, x_d)
Unmap a Fortran rank 2 array from a device (deassociate and free)
subroutine, public device_profiler_start()
Start device profiling.
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
subroutine, public device_event_destroy(event)
Destroy a device event.
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
subroutine device_associate_r2(x, x_d, n)
Associate a Fortran rank 2 array to a (allocated) device pointer.
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
subroutine device_sync_device()
Synchronize the device.
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
subroutine device_associate_r3(x, x_d, n)
Associate a Fortran rank 3 array to a (allocated) device pointer.
subroutine device_unmap_r1(x, x_d)
Unmap a Fortran rank 1 array from a device (deassociate and free)
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
subroutine, public device_event_create(event, flags)
Create a device event queue.
integer function, public device_count()
Return the number of available devices.
subroutine, public device_name(name)
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
integer, public strm_high_prio
High priority stream setting.
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
subroutine device_unmap_r3(x, x_d)
Unmap a Fortran rank 3 array from a device (deassociate and free)
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
subroutine, public device_init
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
subroutine hip_device_name(name)
@ hipmemcpydevicetodevice
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
integer function hip_device_count()
Return the number of available HIP devices.
Implements a hash table ADT.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
subroutine, public opencl_prgm_lib_release
C pointer based hash table.