36 use,
intrinsic :: iso_c_binding
61 bind(c, name =
'cudaMalloc')
62 use,
intrinsic :: iso_c_binding
65 integer(c_size_t),
value :: s
71 bind(c, name =
'cudaFree')
72 use,
intrinsic :: iso_c_binding
74 type(c_ptr),
value :: ptr_d
79 integer(c_int) function cudamemcpy(ptr_dst, ptr_src, s, dir) &
80 bind(c, name =
'cudaMemcpy')
81 use,
intrinsic :: iso_c_binding
83 type(c_ptr),
value :: ptr_dst, ptr_src
84 integer(c_size_t),
value :: s
85 integer(c_int),
value :: dir
91 bind(c, name =
'cudaMemcpyAsync')
92 use,
intrinsic :: iso_c_binding
94 type(c_ptr),
value :: ptr_dst, ptr_src, stream
95 integer(c_size_t),
value :: s
96 integer(c_int),
value :: dir
102 bind(c, name =
'cudaMemsetAsync')
103 use,
intrinsic :: iso_c_binding
105 type(c_ptr),
value :: ptr, stream
106 integer(c_int),
value :: v
107 integer(c_size_t),
value :: s
113 bind(c, name =
'cudaDeviceSynchronize')
114 use,
intrinsic :: iso_c_binding
121 bind(c, name =
'cudaGetDeviceProperties')
122 use,
intrinsic :: iso_c_binding
124 type(c_ptr),
value :: prop
125 integer(c_int),
value ::
device
131 bind(c, name =
'cudaStreamCreate')
132 use,
intrinsic :: iso_c_binding
134 type(c_ptr) :: stream
140 bind(c, name =
'cudaStreamCreateWithFlags')
141 use,
intrinsic :: iso_c_binding
143 type(c_ptr) :: stream
144 integer(c_int),
value :: flags
150 bind(c, name =
'cudaStreamCreateWithPriority')
151 use,
intrinsic :: iso_c_binding
153 type(c_ptr) :: stream
154 integer(c_int),
value :: flags, prio
160 bind(c, name =
'cudaStreamDestroy')
161 use,
intrinsic :: iso_c_binding
163 type(c_ptr),
value :: steam
169 bind(c, name =
'cudaStreamSynchronize')
170 use,
intrinsic :: iso_c_binding
172 type(c_ptr),
value :: stream
178 bind(c, name =
'cudaStreamWaitEvent')
179 use,
intrinsic :: iso_c_binding
181 type(c_ptr),
value :: stream, event
182 integer(c_int),
value :: flags
188 (low_prio, high_prio) &
189 bind(c, name =
'cudaDeviceGetStreamPriorityRange')
190 use,
intrinsic :: iso_c_binding
192 integer(c_int) :: low_prio, high_prio
198 bind(c, name =
'cudaProfilerStart')
199 use,
intrinsic :: iso_c_binding
206 bind(c, name =
'cudaProfilerStop')
207 use,
intrinsic :: iso_c_binding
214 bind(c, name =
'cudaEventCreate')
215 use,
intrinsic :: iso_c_binding
223 bind(c, name =
'cudaEventDestroy')
224 use,
intrinsic :: iso_c_binding
226 type(c_ptr),
value :: event
232 bind(c, name =
'cudaEventCreateWithFlags')
233 use,
intrinsic :: iso_c_binding
236 integer(c_int),
value :: flags
242 bind(c, name =
'cudaEventRecord')
243 use,
intrinsic :: iso_c_binding
245 type(c_ptr),
value :: event, stream
251 bind(c, name =
'cudaEventSynchronize')
252 use,
intrinsic :: iso_c_binding
254 type(c_ptr),
value :: event
260 bind(c, name =
'cudaGetDeviceCount')
261 use,
intrinsic :: iso_c_binding
263 integer(c_int) :: device_count
269 bind(c, name =
'cudaGetDevice')
270 use,
intrinsic :: iso_c_binding
278 bind(c, name =
'cudaSetDevice')
279 use,
intrinsic :: iso_c_binding
281 integer(c_int),
value ::
device
288 STRM_HIGH_PRIO, STRM_LOW_PRIO)
289 type(c_ptr),
intent(inout) :: glb_cmd_queue
290 type(c_ptr),
intent(inout) :: aux_cmd_queue
291 integer,
intent(inout) :: STRM_HIGH_PRIO
292 integer,
intent(inout) :: STRM_LOW_PRIO
293 integer(c_int) :: device_id
294 integer :: nthrds = 1
303 if (nthrds .gt. 1)
then
317 call neko_error(
'Error retrieving stream priority range')
332 type(c_ptr),
intent(inout) :: glb_cmd_queue
333 type(c_ptr),
intent(inout) :: aux_cmd_queue
336 call neko_error(
'Error destroying main stream')
340 call neko_error(
'Error destroying aux stream')
345 character(len=*),
intent(inout) :: name
346 character(kind=c_char, len=8192),
target :: prop
362 end_pos = scan(prop(1:256), c_null_char)
363 if (end_pos .ge. 2)
then
364 name(1:end_pos-1) = prop(1:end_pos-1)
370 integer(c_int) :: num_devices
373 call neko_error(
'Error retrieving device count')
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetodevice
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
@ cudaerrorinitializationerror
@ cudaerrormemoryallocation
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.