Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
cuda_intf.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2022, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use utils, only : neko_error
36 use, intrinsic :: iso_c_binding
37 !$ use omp_lib
38 implicit none
39
40#ifdef HAVE_CUDA
41
43 type(c_ptr), bind(c) :: glb_cmd_queue = c_null_ptr
44
46 type(c_ptr), bind(c) :: aux_cmd_queue = c_null_ptr
47
49 integer :: strm_high_prio
50
52 integer :: strm_low_prio
53
55 enum, bind(c)
56 enumerator :: cudasuccess = 0
57 enumerator :: cudaerrorinvalidvalue = 1
58 enumerator :: cudaerrormemoryallocation = 2
60 end enum
61
63 enum, bind(c)
64 enumerator :: cudamemcpyhosttohost = 0
65 enumerator :: cudamemcpyhosttodevice = 1
66 enumerator :: cudamemcpydevicetohost = 2
67 enumerator :: cudamemcpydevicetodevice = 3
68 enumerator :: cudamemcpydefault = 4
69 end enum
70
71 interface
72 integer(c_int) function cudamalloc(ptr_d, s) &
73 bind(c, name = 'cudaMalloc')
74 use, intrinsic :: iso_c_binding
75 implicit none
76 type(c_ptr) :: ptr_d
77 integer(c_size_t), value :: s
78 end function cudamalloc
79 end interface
80
81 interface
82 integer(c_int) function cudafree(ptr_d) &
83 bind(c, name = 'cudaFree')
84 use, intrinsic :: iso_c_binding
85 implicit none
86 type(c_ptr), value :: ptr_d
87 end function cudafree
88 end interface
89
90 interface
91 integer(c_int) function cudamemcpy(ptr_dst, ptr_src, s, dir) &
92 bind(c, name = 'cudaMemcpy')
93 use, intrinsic :: iso_c_binding
94 implicit none
95 type(c_ptr), value :: ptr_dst, ptr_src
96 integer(c_size_t), value :: s
97 integer(c_int), value :: dir
98 end function cudamemcpy
99 end interface
100
101 interface
102 integer(c_int) function cudamemcpyasync(ptr_dst, ptr_src, s, dir, stream) &
103 bind(c, name = 'cudaMemcpyAsync')
104 use, intrinsic :: iso_c_binding
105 implicit none
106 type(c_ptr), value :: ptr_dst, ptr_src, stream
107 integer(c_size_t), value :: s
108 integer(c_int), value :: dir
109 end function cudamemcpyasync
110 end interface
111
112 interface
113 integer(c_int) function cudadevicesynchronize() &
114 bind(c, name = 'cudaDeviceSynchronize')
115 use, intrinsic :: iso_c_binding
116 implicit none
117 end function cudadevicesynchronize
118 end interface
119
120 interface
121 integer(c_int) function cudagetdeviceproperties(prop, device) &
122 bind(c, name = 'cudaGetDeviceProperties')
123 use, intrinsic :: iso_c_binding
124 implicit none
125 type(c_ptr), value :: prop
126 integer(c_int), value :: device
127 end function cudagetdeviceproperties
128 end interface
129
130 interface
131 integer(c_int) function cudastreamcreate(stream) &
132 bind(c, name = 'cudaStreamCreate')
133 use, intrinsic :: iso_c_binding
134 implicit none
135 type(c_ptr) :: stream
136 end function cudastreamcreate
137 end interface
138
139 interface
140 integer(c_int) function cudastreamcreatewithflags(stream, flags) &
141 bind(c, name = 'cudaStreamCreateWithFlags')
142 use, intrinsic :: iso_c_binding
143 implicit none
144 type(c_ptr) :: stream
145 integer(c_int), value :: flags
146 end function cudastreamcreatewithflags
147 end interface
148
149 interface
150 integer(c_int) function cudastreamcreatewithpriority(stream, flags, prio) &
151 bind(c, name = 'cudaStreamCreateWithPriority')
152 use, intrinsic :: iso_c_binding
153 implicit none
154 type(c_ptr) :: stream
155 integer(c_int), value :: flags, prio
157 end interface
158
159 interface
160 integer(c_int) function cudastreamdestroy(steam) &
161 bind(c, name = 'cudaStreamDestroy')
162 use, intrinsic :: iso_c_binding
163 implicit none
164 type(c_ptr), value :: steam
165 end function cudastreamdestroy
166 end interface
167
168 interface
169 integer(c_int) function cudastreamsynchronize(stream) &
170 bind(c, name = 'cudaStreamSynchronize')
171 use, intrinsic :: iso_c_binding
172 implicit none
173 type(c_ptr), value :: stream
174 end function cudastreamsynchronize
175 end interface
176
177 interface
178 integer(c_int) function cudastreamwaitevent(stream, event, flags) &
179 bind(c, name = 'cudaStreamWaitEvent')
180 use, intrinsic :: iso_c_binding
181 implicit none
182 type(c_ptr), value :: stream, event
183 integer(c_int), value :: flags
184 end function cudastreamwaitevent
185 end interface
186
187 interface
188 integer(c_int) function cudadevicegetstreampriorityrange &
189 (low_prio, high_prio) &
190 bind(c, name = 'cudaDeviceGetStreamPriorityRange')
191 use, intrinsic :: iso_c_binding
192 implicit none
193 integer(c_int) :: low_prio, high_prio
195 end interface
196
197 interface
198 integer(c_int) function cudaprofilerstart() &
199 bind(c, name = 'cudaProfilerStart')
200 use, intrinsic :: iso_c_binding
201 implicit none
202 end function cudaprofilerstart
203 end interface
204
205 interface
206 integer(c_int) function cudaprofilerstop() &
207 bind(c, name = 'cudaProfilerStop')
208 use, intrinsic :: iso_c_binding
209 implicit none
210 end function cudaprofilerstop
211 end interface
212
213 interface
214 integer(c_int) function cudaeventcreate(event) &
215 bind(c, name = 'cudaEventCreate')
216 use, intrinsic :: iso_c_binding
217 implicit none
218 type(c_ptr) :: event
219 end function cudaeventcreate
220 end interface
221
222 interface
223 integer(c_int) function cudaeventdestroy(event) &
224 bind(c, name = 'cudaEventDestroy')
225 use, intrinsic :: iso_c_binding
226 implicit none
227 type(c_ptr), value :: event
228 end function cudaeventdestroy
229 end interface
230
231 interface
232 integer(c_int) function cudaeventcreatewithflags(event, flags) &
233 bind(c, name = 'cudaEventCreateWithFlags')
234 use, intrinsic :: iso_c_binding
235 implicit none
236 type(c_ptr) :: event
237 integer(c_int), value :: flags
238 end function cudaeventcreatewithflags
239 end interface
240
241 interface
242 integer(c_int) function cudaeventrecord(event, stream) &
243 bind(c, name = 'cudaEventRecord')
244 use, intrinsic :: iso_c_binding
245 implicit none
246 type(c_ptr), value :: event, stream
247 end function cudaeventrecord
248 end interface
249
250 interface
251 integer(c_int) function cudaeventsynchronize(event) &
252 bind(c, name = 'cudaEventSynchronize')
253 use, intrinsic :: iso_c_binding
254 implicit none
255 type(c_ptr), value :: event
256 end function cudaeventsynchronize
257 end interface
258
259 interface
260 integer(c_int) function cudagetdevicecount(device_count) &
261 bind(c, name = 'cudaGetDeviceCount')
262 use, intrinsic :: iso_c_binding
263 implicit none
264 integer(c_int) :: device_count
265 end function cudagetdevicecount
266 end interface
267
268 interface
269 integer(c_int) function cudagetdevice(device) &
270 bind(c, name = 'cudaGetDevice')
271 use, intrinsic :: iso_c_binding
272 implicit none
273 integer(c_int) :: device
274 end function cudagetdevice
275 end interface
276
277 interface
278 integer(c_int) function cudasetdevice(device) &
279 bind(c, name = 'cudaSetDevice')
280 use, intrinsic :: iso_c_binding
281 implicit none
282 integer(c_int), value :: device
283 end function cudasetdevice
284 end interface
285
286contains
287
288 subroutine cuda_init
289 integer(c_int) :: device_id
290 integer :: nthrds = 1
291
292 !$omp parallel
293 !$omp master
294 !$ nthrds = omp_get_num_threads()
295 !$omp end master
296 !$omp end parallel
297
298 ! Ensure that all threads are assigned to the same device
299 if (nthrds .gt. 1) then
300 if (cudagetdevice(device_id) .ne. cudasuccess) then
301 call neko_error('Error retrieving device id')
302 end if
303
304 !$omp parallel
305 if (cudasetdevice(device_id) .ne. cudasuccess) then
306 call neko_error('Error setting device id')
307 end if
308 !$omp end parallel
309 end if
310
312 .ne. cudasuccess) then
313 call neko_error('Error retrieving stream priority range')
314 end if
315
317 .ne. cudasuccess) then
318 call neko_error('Error creating main stream')
319 end if
320
322 .ne. cudasuccess) then
323 call neko_error('Error creating aux stream')
324 end if
325 end subroutine cuda_init
326
327 subroutine cuda_finalize
329 call neko_error('Error destroying main stream')
330 end if
331
333 call neko_error('Error destroying aux stream')
334 end if
335 end subroutine cuda_finalize
336
337 subroutine cuda_device_name(name)
338 character(len=*), intent(inout) :: name
339 character(kind=c_char, len=8192), target :: prop
340 integer :: end_pos
341
342 !
343 ! Yes this is an ugly hack!
344 ! Since we're only interested in the device name (first 256 bytes)
345 ! we pass down a large enough chunk of memory to the cuda runtime
346 ! and extract what we need later on
347 !
348 ! This will of course break if sizeof(cudaDeviceProp) > 8192
349 !
350
351 if (cudagetdeviceproperties(c_loc(prop), 0) .ne. cudasuccess) then
352 call neko_error('Failed to query device')
353 end if
354
355 end_pos = scan(prop(1:256), c_null_char)
356 if (end_pos .ge. 2) then
357 name(1:end_pos-1) = prop(1:end_pos-1)
358 end if
359 end subroutine cuda_device_name
360
362 integer function cuda_device_count()
363 integer(c_int) :: num_devices
364
365 if (cudagetdevicecount(num_devices) .ne. cudasuccess) then
366 call neko_error('Error retrieving device count')
367 end if
368
369 cuda_device_count = num_devices
370 end function cuda_device_count
371
372#endif
373
374end module cuda_intf
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
type(c_ptr), bind(C) aux_cmd_queue
Aux HIP command queue.
Definition cuda_intf.F90:46
subroutine cuda_finalize
@ cudamemcpydevicetohost
Definition cuda_intf.F90:66
@ cudamemcpydefault
Definition cuda_intf.F90:68
@ cudamemcpyhosttohost
Definition cuda_intf.F90:64
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:67
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:65
subroutine cuda_init
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
@ cudaerrorinitializationerror
Definition cuda_intf.F90:59
@ cudaerrormemoryallocation
Definition cuda_intf.F90:58
@ cudaerrorinvalidvalue
Definition cuda_intf.F90:57
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Definition cuda_intf.F90:43
integer strm_low_prio
Low priority stream setting.
Definition cuda_intf.F90:52
integer strm_high_prio
High priority stream setting.
Definition cuda_intf.F90:49
Device abstraction, common interface for various accelerators.
Definition device.F90:34
Utilities.
Definition utils.f90:35