Neko  0.8.1
A portable framework for high-order spectral element flow simulations
cuda_intf.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2022, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module cuda_intf
35  use utils, only : neko_error
36  use, intrinsic :: iso_c_binding
37  !$ use omp_lib
38  implicit none
39 
40 #ifdef HAVE_CUDA
41 
43  type(c_ptr), bind(c) :: glb_cmd_queue = c_null_ptr
44 
46  type(c_ptr), bind(c) :: aux_cmd_queue = c_null_ptr
47 
49  integer :: strm_high_prio
50 
52  integer :: strm_low_prio
53 
55  enum, bind(c)
56  enumerator :: cudasuccess = 0
57  enumerator :: cudaerrorinvalidvalue = 1
58  enumerator :: cudaerrormemoryallocation = 2
59  enumerator :: cudaerrorinitializationerror = 3
60  end enum
61 
63  enum, bind(c)
64  enumerator :: cudamemcpyhosttohost = 0
65  enumerator :: cudamemcpyhosttodevice = 1
66  enumerator :: cudamemcpydevicetohost = 2
67  enumerator :: cudamemcpydevicetodevice = 3
68  enumerator :: cudamemcpydefault = 4
69  end enum
70 
71  interface
72  integer (c_int) function cudamalloc(ptr_d, s) &
73  bind(c, name='cudaMalloc')
74  use, intrinsic :: iso_c_binding
75  implicit none
76  type(c_ptr) :: ptr_d
77  integer(c_size_t), value :: s
78  end function cudamalloc
79  end interface
80 
81  interface
82  integer (c_int) function cudafree(ptr_d) &
83  bind(c, name='cudaFree')
84  use, intrinsic :: iso_c_binding
85  implicit none
86  type(c_ptr), value :: ptr_d
87  end function cudafree
88  end interface
89 
90  interface
91  integer (c_int) function cudamemcpy(ptr_dst, ptr_src, s, dir) &
92  bind(c, name='cudaMemcpy')
93  use, intrinsic :: iso_c_binding
94  implicit none
95  type(c_ptr), value :: ptr_dst, ptr_src
96  integer(c_size_t), value :: s
97  integer(c_int), value :: dir
98  end function cudamemcpy
99  end interface
100 
101  interface
102  integer (c_int) function cudamemcpyasync(ptr_dst, ptr_src, s, dir, stream) &
103  bind(c, name='cudaMemcpyAsync')
104  use, intrinsic :: iso_c_binding
105  implicit none
106  type(c_ptr), value :: ptr_dst, ptr_src, stream
107  integer(c_size_t), value :: s
108  integer(c_int), value :: dir
109  end function cudamemcpyasync
110  end interface
111 
112  interface
113  integer (c_int) function cudadevicesynchronize() &
114  bind(c, name='cudaDeviceSynchronize')
115  use, intrinsic :: iso_c_binding
116  implicit none
117  end function cudadevicesynchronize
118  end interface
119 
120  interface
121  integer (c_int) function cudagetdeviceproperties(prop, device) &
122  bind(c, name='cudaGetDeviceProperties')
123  use, intrinsic :: iso_c_binding
124  implicit none
125  type(c_ptr), value :: prop
126  integer(c_int), value :: device
127  end function cudagetdeviceproperties
128  end interface
129 
130  interface
131  integer (c_int) function cudastreamcreate(stream) &
132  bind(c, name='cudaStreamCreate')
133  use, intrinsic :: iso_c_binding
134  implicit none
135  type(c_ptr) :: stream
136  end function cudastreamcreate
137  end interface
138 
139  interface
140  integer (c_int) function cudastreamcreatewithflags(stream, flags) &
141  bind(c, name='cudaStreamCreateWithFlags')
142  use, intrinsic :: iso_c_binding
143  implicit none
144  type(c_ptr) :: stream
145  integer(c_int), value :: flags
146  end function cudastreamcreatewithflags
147  end interface
148 
149  interface
150  integer (c_int) function cudastreamcreatewithpriority(stream, flags, prio) &
151  bind(c, name='cudaStreamCreateWithPriority')
152  use, intrinsic :: iso_c_binding
153  implicit none
154  type(c_ptr) :: stream
155  integer(c_int), value :: flags, prio
156  end function cudastreamcreatewithpriority
157  end interface
158 
159  interface
160  integer (c_int) function cudastreamdestroy(steam) &
161  bind(c, name='cudaStreamDestroy')
162  use, intrinsic :: iso_c_binding
163  implicit none
164  type(c_ptr), value :: steam
165  end function cudastreamdestroy
166  end interface
167 
168  interface
169  integer (c_int) function cudastreamsynchronize(stream) &
170  bind(c, name='cudaStreamSynchronize')
171  use, intrinsic :: iso_c_binding
172  implicit none
173  type(c_ptr), value :: stream
174  end function cudastreamsynchronize
175  end interface
176 
177  interface
178  integer (c_int) function cudastreamwaitevent(stream, event, flags) &
179  bind(c, name='cudaStreamWaitEvent')
180  use, intrinsic :: iso_c_binding
181  implicit none
182  type(c_ptr), value :: stream, event
183  integer(c_int), value :: flags
184  end function cudastreamwaitevent
185  end interface
186 
187  interface
188  integer (c_int) function cudadevicegetstreampriorityrange(low_prio, high_prio) &
189  bind(c, name='cudaDeviceGetStreamPriorityRange')
190  use, intrinsic :: iso_c_binding
191  implicit none
192  integer(c_int) :: low_prio, high_prio
193  end function cudadevicegetstreampriorityrange
194  end interface
195 
196  interface
197  integer (c_int) function cudaprofilerstart() &
198  bind(c, name='cudaProfilerStart')
199  use, intrinsic :: iso_c_binding
200  implicit none
201  end function cudaprofilerstart
202  end interface
203 
204  interface
205  integer (c_int) function cudaprofilerstop() &
206  bind(c, name='cudaProfilerStop')
207  use, intrinsic :: iso_c_binding
208  implicit none
209  end function cudaprofilerstop
210  end interface
211 
212  interface
213  integer (c_int) function cudaeventcreate(event) &
214  bind(c, name='cudaEventCreate')
215  use, intrinsic :: iso_c_binding
216  implicit none
217  type(c_ptr) :: event
218  end function cudaeventcreate
219  end interface
220 
221  interface
222  integer (c_int) function cudaeventdestroy(event) &
223  bind(c, name='cudaEventDestroy')
224  use, intrinsic :: iso_c_binding
225  implicit none
226  type(c_ptr), value :: event
227  end function cudaeventdestroy
228  end interface
229 
230  interface
231  integer (c_int) function cudaeventcreatewithflags(event, flags) &
232  bind(c, name='cudaEventCreateWithFlags')
233  use, intrinsic :: iso_c_binding
234  implicit none
235  type(c_ptr) :: event
236  integer(c_int), value :: flags
237  end function cudaeventcreatewithflags
238  end interface
239 
240  interface
241  integer (c_int) function cudaeventrecord(event, stream) &
242  bind(c, name='cudaEventRecord')
243  use, intrinsic :: iso_c_binding
244  implicit none
245  type(c_ptr), value :: event, stream
246  end function cudaeventrecord
247  end interface
248 
249  interface
250  integer (c_int) function cudaeventsynchronize(event) &
251  bind(c, name='cudaEventSynchronize')
252  use, intrinsic :: iso_c_binding
253  implicit none
254  type(c_ptr), value :: event
255  end function cudaeventsynchronize
256  end interface
257 
258  interface
259  integer (c_int) function cudagetdevice(device) &
260  bind(c, name='cudaGetDevice')
261  use, intrinsic :: iso_c_binding
262  implicit none
263  integer(c_int) :: device
264  end function cudagetdevice
265  end interface
266 
267  interface
268  integer (c_int) function cudasetdevice(device) &
269  bind(c, name='cudaSetDevice')
270  use, intrinsic :: iso_c_binding
271  implicit none
272  integer(c_int), value :: device
273  end function cudasetdevice
274  end interface
275 
276 contains
277 
278  subroutine cuda_init
279  integer(c_int) :: device_id
280  integer :: nthrds = 1
281 
282  !$omp parallel
283  !$omp master
284  !$ nthrds = omp_get_num_threads()
285  !$omp end master
286  !$omp end parallel
287 
288  ! Ensure that all threads are assigned to the same device
289  if (nthrds .gt. 1) then
290  if (cudagetdevice(device_id) .ne. cudasuccess) then
291  call neko_error('Error retrieving device id')
292  end if
293 
294  !$omp parallel
295  if (cudasetdevice(device_id) .ne. cudasuccess) then
296  call neko_error('Error setting device id')
297  end if
298  !$omp end parallel
299  end if
300 
301  if (cudadevicegetstreampriorityrange(strm_low_prio, strm_high_prio) &
302  .ne. cudasuccess) then
303  call neko_error('Error retrieving stream priority range')
304  end if
305 
306  if (cudastreamcreatewithpriority(glb_cmd_queue, 1, strm_high_prio) &
307  .ne. cudasuccess) then
308  call neko_error('Error creating main stream')
309  end if
310 
311  if (cudastreamcreatewithpriority(aux_cmd_queue, 1, strm_low_prio) &
312  .ne. cudasuccess) then
313  call neko_error('Error creating aux stream')
314  end if
315  end subroutine cuda_init
316 
317  subroutine cuda_finalize
318  if (cudastreamdestroy(glb_cmd_queue) .ne. cudasuccess) then
319  call neko_error('Error destroying main stream')
320  end if
321 
322  if (cudastreamdestroy(aux_cmd_queue) .ne. cudasuccess) then
323  call neko_error('Error destroying aux stream')
324  end if
325  end subroutine cuda_finalize
326 
327  subroutine cuda_device_name(name)
328  character(len=*), intent(inout) :: name
329  character(kind=c_char, len=8192), target :: prop
330  integer :: end_pos
331 
332  !
333  ! Yes this is an ugly hack!
334  ! Since we're only interested in the device name (first 256 bytes)
335  ! we pass down a large enough chunk of memory to the cuda runtime
336  ! and extract what we need later on
337  !
338  ! This will of course break if sizeof(cudaDeviceProp) > 8192
339  !
340 
341  if (cudagetdeviceproperties(c_loc(prop), 0) .ne. cudasuccess) then
342  call neko_error('Failed to query device')
343  end if
344 
345  end_pos = scan(prop(1:256), c_null_char)
346  if(end_pos .ge. 2) then
347  name(1:end_pos-1) = prop(1:end_pos-1)
348  endif
349  end subroutine cuda_device_name
350 
351 #endif
352 
353 end module cuda_intf
Fortran CUDA interface.
Definition: cuda_intf.F90:34
subroutine cuda_device_name(name)
Definition: cuda_intf.F90:328
type(c_ptr), bind(C) aux_cmd_queue
Aux HIP command queue.
Definition: cuda_intf.F90:46
subroutine cuda_finalize
Definition: cuda_intf.F90:318
@ cudamemcpydevicetohost
Definition: cuda_intf.F90:66
@ cudamemcpydefault
Definition: cuda_intf.F90:68
@ cudamemcpyhosttohost
Definition: cuda_intf.F90:64
@ cudamemcpydevicetodevice
Definition: cuda_intf.F90:67
@ cudamemcpyhosttodevice
Definition: cuda_intf.F90:65
subroutine cuda_init
Definition: cuda_intf.F90:279
@ cudaerrorinitializationerror
Definition: cuda_intf.F90:59
@ cudaerrormemoryallocation
Definition: cuda_intf.F90:58
@ cudaerrorinvalidvalue
Definition: cuda_intf.F90:57
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Definition: cuda_intf.F90:43
integer strm_low_prio
Low priority stream setting.
Definition: cuda_intf.F90:52
integer strm_high_prio
High priority stream setting.
Definition: cuda_intf.F90:49
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
Utilities.
Definition: utils.f90:35