Neko  0.8.1
A portable framework for high-order spectral element flow simulations
device.F90
Go to the documentation of this file.
1 ! Copyright (c) 2021-2023, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module device
35  use num_types, only : i8
36  use opencl_intf
37  use cuda_intf
38  use hip_intf
39  use htable, only : htable_cptr_t, h_cptr_t
40  use utils, only : neko_error
41  use dummy_device
42  use opencl_prgm_lib
43  use, intrinsic :: iso_c_binding
44  implicit none
45 ! private
46 
47  integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49 
51  interface device_memcpy
52  module procedure device_memcpy_r1, device_memcpy_r2, &
54  end interface device_memcpy
55 
57  interface device_map
58  module procedure device_map_r1, device_map_r2, &
60  end interface device_map
61 
63  interface device_associate
64  module procedure device_associate_r1, device_associate_r2, &
66  end interface device_associate
67 
70  module procedure device_associated_r1, device_associated_r2, &
72  end interface device_associated
73 
76  module procedure device_deassociate_r1, device_deassociate_r2, &
78  end interface device_deassociate
79 
81  interface device_get_ptr
82  module procedure device_get_ptr_r1, device_get_ptr_r2, &
84  end interface device_get_ptr
85 
87  interface device_sync
88  module procedure device_sync_device, device_sync_stream
89  end interface device_sync
90 
92  type(htable_cptr_t), private :: device_addrtbl
93 
101 
102  private :: device_memcpy_common
103 
104 contains
105 
106  subroutine device_init
107 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
108  call device_addrtbl%init(64)
109 
110 #ifdef HAVE_HIP
111  call hip_init
112 #elif HAVE_CUDA
113  call cuda_init
114 #elif HAVE_OPENCL
115  call opencl_init
116 #endif
117 
118 #endif
119  end subroutine device_init
120 
121  subroutine device_finalize
122 #if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
123  call device_addrtbl%free()
124 
125 #ifdef HAVE_HIP
126  call hip_finalize
127 #elif HAVE_CUDA
128  call cuda_finalize
129 #elif HAVE_OPENCL
131  call opencl_finalize
132 #endif
133 
134 #endif
135  end subroutine device_finalize
136 
137  subroutine device_name(name)
138  character(len=*), intent(inout) :: name
139 
140 #ifdef HAVE_HIP
141  call hip_device_name(name)
142 #elif HAVE_CUDA
143  call cuda_device_name(name)
144 #elif HAVE_OPENCL
145  call opencl_device_name(name)
146 #endif
147  end subroutine device_name
148 
150  subroutine device_alloc(x_d, s)
151  type(c_ptr), intent(inout) :: x_d
152  integer(c_size_t) :: s
153  integer :: ierr
154 #ifdef HAVE_HIP
155  if (hipmalloc(x_d, s) .ne. hipsuccess) then
156  call neko_error('Memory allocation on device failed')
157  end if
158 #elif HAVE_CUDA
159  if (cudamalloc(x_d, s) .ne. cudasuccess) then
160  call neko_error('Memory allocation on device failed')
161  end if
162 #elif HAVE_OPENCL
163  x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
164  if (ierr .ne. cl_success) then
165  call neko_error('Memory allocation on device failed')
166  end if
167 #endif
168  end subroutine device_alloc
169 
171  subroutine device_free(x_d)
172  type(c_ptr), intent(inout) :: x_d
173 #ifdef HAVE_HIP
174  if (hipfree(x_d) .ne. hipsuccess) then
175  call neko_error('Memory deallocation on device failed')
176  end if
177 #elif HAVE_CUDA
178  if (cudafree(x_d) .ne. cudasuccess) then
179  call neko_error('Memory deallocation on device failed')
180  end if
181 #elif HAVE_OPENCL
182  if (clreleasememobject(x_d) .ne. cl_success) then
183  call neko_error('Memory deallocation on device failed')
184  end if
185 #endif
186  x_d = c_null_ptr
187  end subroutine device_free
188 
190  subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
191  integer, intent(in) :: n
192  class(*), intent(inout), target :: x(:)
193  type(c_ptr), intent(inout) :: x_d
194  integer, intent(in), value :: dir
195  logical :: sync
196  type(c_ptr), optional :: strm
197  type(c_ptr) :: ptr_h, copy_stream
198  integer(c_size_t) :: s
199 
200  if (present(strm)) then
201  copy_stream = strm
202  else
203  copy_stream = glb_cmd_queue
204  end if
205 
206  select type(x)
207  type is (integer)
208  s = n * 4
209  ptr_h = c_loc(x)
210  type is (integer(i8))
211  s = n * 8
212  ptr_h = c_loc(x)
213  type is (real)
214  s = n * 4
215  ptr_h = c_loc(x)
216  type is (double precision)
217  s = n * 8
218  ptr_h = c_loc(x)
219  class default
220  call neko_error('Unknown Fortran type')
221  end select
222 
223  call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
224 
225  end subroutine device_memcpy_r1
226 
228  subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
229  integer, intent(in) :: n
230  class(*), intent(inout), target :: x(:,:)
231  type(c_ptr), intent(inout) :: x_d
232  integer, intent(in), value :: dir
233  logical :: sync
234  type(c_ptr), optional :: strm
235  type(c_ptr) :: ptr_h, copy_stream
236  integer(c_size_t) :: s
237 
238  if (present(strm)) then
239  copy_stream = strm
240  else
241  copy_stream = glb_cmd_queue
242  end if
243 
244  select type(x)
245  type is (integer)
246  s = n * 4
247  ptr_h = c_loc(x)
248  type is (integer(i8))
249  s = n * 8
250  ptr_h = c_loc(x)
251  type is (real)
252  s = n * 4
253  ptr_h = c_loc(x)
254  type is (double precision)
255  s = n * 8
256  ptr_h = c_loc(x)
257  class default
258  call neko_error('Unknown Fortran type')
259  end select
260 
261  call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
262 
263  end subroutine device_memcpy_r2
264 
266  subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
267  integer, intent(in) :: n
268  class(*), intent(inout), target :: x(:,:,:)
269  type(c_ptr), intent(inout) :: x_d
270  integer, intent(in), value :: dir
271  logical :: sync
272  type(c_ptr), optional :: strm
273  type(c_ptr) :: ptr_h, copy_stream
274  integer(c_size_t) :: s
275 
276  if (present(strm)) then
277  copy_stream = strm
278  else
279  copy_stream = glb_cmd_queue
280  end if
281 
282  select type(x)
283  type is (integer)
284  s = n * 4
285  ptr_h = c_loc(x)
286  type is (integer(i8))
287  s = n * 8
288  ptr_h = c_loc(x)
289  type is (real)
290  s = n * 4
291  ptr_h = c_loc(x)
292  type is (double precision)
293  s = n * 8
294  ptr_h = c_loc(x)
295  class default
296  call neko_error('Unknown Fortran type')
297  end select
298 
299  call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
300 
301  end subroutine device_memcpy_r3
302 
304  subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
305  integer, intent(in) :: n
306  class(*), intent(inout), target :: x(:,:,:,:)
307  type(c_ptr), intent(inout) :: x_d
308  integer, intent(in), value :: dir
309  logical :: sync
310  type(c_ptr), optional :: strm
311  type(c_ptr) :: ptr_h, copy_stream
312  integer(c_size_t) :: s
313 
314  if (present(strm)) then
315  copy_stream = strm
316  else
317  copy_stream = glb_cmd_queue
318  end if
319 
320  select type(x)
321  type is (integer)
322  s = n * 4
323  ptr_h = c_loc(x)
324  type is (integer(i8))
325  s = n * 8
326  ptr_h = c_loc(x)
327  type is (real)
328  s = n * 4
329  ptr_h = c_loc(x)
330  type is (double precision)
331  s = n * 8
332  ptr_h = c_loc(x)
333  class default
334  call neko_error('Unknown Fortran type')
335  end select
336 
337  call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
338 
339  end subroutine device_memcpy_r4
340 
344  subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
345  type(c_ptr), intent(inout) :: dst
346  type(c_ptr), intent(inout) :: src
347  integer(c_size_t), intent(in) :: s
348  integer, intent(in), value :: dir
349  logical, optional :: sync
350  type(c_ptr), optional :: strm
351  type(c_ptr) :: copy_stream
352  logical :: sync_device
353 
354  if (present(sync)) then
355  sync_device = sync
356  else
357  sync_device = .false.
358  end if
359 
360  if (present(strm)) then
361  copy_stream = strm
362  else
363  copy_stream = glb_cmd_queue
364  end if
365 
366  call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
367 
368  end subroutine device_memcpy_cptr
369 
373  subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
374  type(c_ptr), intent(inout) :: ptr_h
375  type(c_ptr), intent(inout) :: x_d
376  integer(c_size_t), intent(in) :: s
377  integer, intent(in), value :: dir
378  logical, intent(in) :: sync_device
379  type(c_ptr), intent(inout) :: stream
380 #ifdef HAVE_HIP
381  if (dir .eq. host_to_device) then
382  if (hipmemcpyasync(x_d, ptr_h, s, &
383  hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
384  call neko_error('Device memcpy async (host-to-device) failed')
385  end if
386  else if (dir .eq. device_to_host) then
387  if (hipmemcpyasync(ptr_h, x_d, s, &
388  hipmemcpydevicetohost, stream) .ne. hipsuccess) then
389  call neko_error('Device memcpy async (device-to-host) failed')
390  end if
391  else if (dir .eq. device_to_device) then
392  if (hipmemcpyasync(ptr_h, x_d, s, &
393  hipmemcpydevicetodevice, stream) .ne. hipsuccess) then
394  call neko_error('Device memcpy async (device-to-device) failed')
395  end if
396  else
397  call neko_error('Device memcpy failed (invalid direction')
398  end if
399  if (sync_device) then
400  call device_sync_stream(stream)
401  end if
402 #elif HAVE_CUDA
403  if (dir .eq. host_to_device) then
404  if (cudamemcpyasync(x_d, ptr_h, s, &
405  cudamemcpyhosttodevice, stream) .ne. cudasuccess) then
406  call neko_error('Device memcpy async (host-to-device) failed')
407  end if
408  else if (dir .eq. device_to_host) then
409  if (cudamemcpyasync(ptr_h, x_d, s, &
410  cudamemcpydevicetohost, stream) .ne. cudasuccess) then
411  call neko_error('Device memcpy async (device-to-host) failed')
412  end if
413  else if (dir .eq. device_to_device) then
414  if (cudamemcpyasync(ptr_h, x_d, s, &
415  cudamemcpydevicetodevice, stream) .ne. cudasuccess) then
416  call neko_error('Device memcpy async (device-to-device) failed')
417  end if
418  else
419  call neko_error('Device memcpy failed (invalid direction')
420  end if
421  if (sync_device) then
422  call device_sync_stream(stream)
423  end if
424 #elif HAVE_OPENCL
425  if (sync_device) then
426  if (dir .eq. host_to_device) then
427  if (clenqueuewritebuffer(glb_cmd_queue, x_d, cl_true, 0_i8, s, ptr_h, &
428  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
429  call neko_error('Device memcpy (host-to-device) failed')
430  end if
431  else if (dir .eq. device_to_host) then
432  if (clenqueuereadbuffer(glb_cmd_queue, x_d, cl_true, 0_i8, s, ptr_h, &
433  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
434  call neko_error('Device memcpy (device-to-host) failed')
435  end if
436  else if (dir .eq. device_to_device) then
437  if (clenqueuecopybuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
438  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
439  call neko_error('Device memcpy (device-to-device) failed')
440  end if
441  else
442  call neko_error('Device memcpy failed (invalid direction')
443  end if
444  else
445  if (dir .eq. host_to_device) then
446  if (clenqueuewritebuffer(glb_cmd_queue, x_d, cl_false, 0_i8, s, ptr_h, &
447  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
448  call neko_error('Device memcpy (host-to-device) failed')
449  end if
450  else if (dir .eq. device_to_host) then
451  if (clenqueuereadbuffer(glb_cmd_queue, x_d, cl_false, 0_i8, s, ptr_h, &
452  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
453  call neko_error('Device memcpy (device-to-host) failed')
454  end if
455  else if (dir .eq. device_to_device) then
456  if (clenqueuecopybuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
457  0, c_null_ptr, c_null_ptr) .ne. cl_success) then
458  call neko_error('Device memcpy (device-to-device) failed')
459  end if
460  else
461  call neko_error('Device memcpy failed (invalid direction')
462  end if
463  end if
464 #endif
465  end subroutine device_memcpy_common
466 
468  subroutine device_associate_r1(x, x_d)
469  class(*), intent(inout), target :: x(:)
470  type(c_ptr), intent(inout) :: x_d
471  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
472 
473  select type(x)
474  type is (integer)
475  htbl_ptr_h%ptr = c_loc(x)
476  type is (integer(i8))
477  htbl_ptr_h%ptr = c_loc(x)
478  type is (real)
479  htbl_ptr_h%ptr = c_loc(x)
480  type is (double precision)
481  htbl_ptr_h%ptr = c_loc(x)
482  class default
483  call neko_error('Unknown Fortran type')
484  end select
485 
486  htbl_ptr_d%ptr = x_d
487 
488  call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
489 
490  end subroutine device_associate_r1
491 
493  subroutine device_associate_r2(x, x_d)
494  class(*), intent(inout), target :: x(:,:)
495  type(c_ptr), intent(inout) :: x_d
496  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
497 
498  select type(x)
499  type is (integer)
500  htbl_ptr_h%ptr = c_loc(x)
501  type is (integer(i8))
502  htbl_ptr_h%ptr = c_loc(x)
503  type is (real)
504  htbl_ptr_h%ptr = c_loc(x)
505  type is (double precision)
506  htbl_ptr_h%ptr = c_loc(x)
507  class default
508  call neko_error('Unknown Fortran type')
509  end select
510 
511  htbl_ptr_d%ptr = x_d
512 
513  call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
514 
515  end subroutine device_associate_r2
516 
518  subroutine device_associate_r3(x, x_d)
519  class(*), intent(inout), target :: x(:,:,:)
520  type(c_ptr), intent(inout) :: x_d
521  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
522 
523  select type(x)
524  type is (integer)
525  htbl_ptr_h%ptr = c_loc(x)
526  type is (integer(i8))
527  htbl_ptr_h%ptr = c_loc(x)
528  type is (real)
529  htbl_ptr_h%ptr = c_loc(x)
530  type is (double precision)
531  htbl_ptr_h%ptr = c_loc(x)
532  class default
533  call neko_error('Unknown Fortran type')
534  end select
535 
536  htbl_ptr_d%ptr = x_d
537 
538  call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
539 
540  end subroutine device_associate_r3
541 
543  subroutine device_associate_r4(x, x_d)
544  class(*), intent(inout), target :: x(:,:,:,:)
545  type(c_ptr), intent(inout) :: x_d
546  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
547 
548  select type(x)
549  type is (integer)
550  htbl_ptr_h%ptr = c_loc(x)
551  type is (integer(i8))
552  htbl_ptr_h%ptr = c_loc(x)
553  type is (real)
554  htbl_ptr_h%ptr = c_loc(x)
555  type is (double precision)
556  htbl_ptr_h%ptr = c_loc(x)
557  class default
558  call neko_error('Unknown Fortran type')
559  end select
560 
561  htbl_ptr_d%ptr = x_d
562 
563  call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
564 
565  end subroutine device_associate_r4
566 
568  subroutine device_deassociate_r1(x)
569  class(*), intent(inout), target :: x(:)
570  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
571 
572  select type(x)
573  type is (integer)
574  htbl_ptr_h%ptr = c_loc(x)
575  type is (integer(i8))
576  htbl_ptr_h%ptr = c_loc(x)
577  type is (real)
578  htbl_ptr_h%ptr = c_loc(x)
579  type is (double precision)
580  htbl_ptr_h%ptr = c_loc(x)
581  class default
582  call neko_error('Unknown Fortran type')
583  end select
584 
585  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
586  call device_addrtbl%remove(htbl_ptr_h)
587  end if
588 
589  end subroutine device_deassociate_r1
590 
592  subroutine device_deassociate_r2(x)
593  class(*), intent(inout), target :: x(:,:)
594  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
595 
596  select type(x)
597  type is (integer)
598  htbl_ptr_h%ptr = c_loc(x)
599  type is (integer(i8))
600  htbl_ptr_h%ptr = c_loc(x)
601  type is (real)
602  htbl_ptr_h%ptr = c_loc(x)
603  type is (double precision)
604  htbl_ptr_h%ptr = c_loc(x)
605  class default
606  call neko_error('Unknown Fortran type')
607  end select
608 
609  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
610  call device_addrtbl%remove(htbl_ptr_h)
611  end if
612 
613  end subroutine device_deassociate_r2
614 
616  subroutine device_deassociate_r3(x)
617  class(*), intent(inout), target :: x(:,:,:)
618  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
619 
620  select type(x)
621  type is (integer)
622  htbl_ptr_h%ptr = c_loc(x)
623  type is (integer(i8))
624  htbl_ptr_h%ptr = c_loc(x)
625  type is (real)
626  htbl_ptr_h%ptr = c_loc(x)
627  type is (double precision)
628  htbl_ptr_h%ptr = c_loc(x)
629  class default
630  call neko_error('Unknown Fortran type')
631  end select
632 
633  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
634  call device_addrtbl%remove(htbl_ptr_h)
635  end if
636 
637  end subroutine device_deassociate_r3
638 
640  subroutine device_deassociate_r4(x)
641  class(*), intent(inout), target :: x(:,:,:,:)
642  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
643 
644  select type(x)
645  type is (integer)
646  htbl_ptr_h%ptr = c_loc(x)
647  type is (integer(i8))
648  htbl_ptr_h%ptr = c_loc(x)
649  type is (real)
650  htbl_ptr_h%ptr = c_loc(x)
651  type is (double precision)
652  htbl_ptr_h%ptr = c_loc(x)
653  class default
654  call neko_error('Unknown Fortran type')
655  end select
656 
657  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
658  call device_addrtbl%remove(htbl_ptr_h)
659  end if
660 
661  end subroutine device_deassociate_r4
662 
664  subroutine device_map_r1(x, x_d, n)
665  integer, intent(in) :: n
666  class(*), intent(inout), target :: x(:)
667  type(c_ptr), intent(inout) :: x_d
668  integer(c_size_t) :: s
669 
670  if (c_associated(x_d)) then
671  call neko_error('Device pointer already associated')
672  end if
673 
674  select type(x)
675  type is (integer)
676  s = n * 4
677  type is (integer(i8))
678  s = n * 8
679  type is (real)
680  s = n * 4
681  type is (double precision)
682  s = n * 8
683  class default
684  call neko_error('Unknown Fortran type')
685  end select
686 
687  call device_alloc(x_d, s)
688  call device_associate(x, x_d)
689 
690  end subroutine device_map_r1
691 
693  subroutine device_map_r2(x, x_d, n)
694  integer, intent(in) :: n
695  class(*), intent(inout), target :: x(:,:)
696  type(c_ptr), intent(inout) :: x_d
697  integer(c_size_t) :: s
698 
699  if (c_associated(x_d)) then
700  call neko_error('Device pointer already associated')
701  end if
702 
703  select type(x)
704  type is (integer)
705  s = n * 4
706  type is (integer(i8))
707  s = n * 8
708  type is (real)
709  s = n * 4
710  type is (double precision)
711  s = n * 8
712  class default
713  call neko_error('Unknown Fortran type')
714  end select
715 
716  call device_alloc(x_d, s)
717  call device_associate(x, x_d)
718 
719  end subroutine device_map_r2
720 
722  subroutine device_map_r3(x, x_d, n)
723  integer, intent(in) :: n
724  class(*), intent(inout), target :: x(:,:,:)
725  type(c_ptr), intent(inout) :: x_d
726  integer(c_size_t) :: s
727 
728  if (c_associated(x_d)) then
729  call neko_error('Device pointer already associated')
730  end if
731 
732  select type(x)
733  type is (integer)
734  s = n * 4
735  type is (integer(i8))
736  s = n * 8
737  type is (real)
738  s = n * 4
739  type is (double precision)
740  s = n * 8
741  class default
742  call neko_error('Unknown Fortran type')
743  end select
744 
745  call device_alloc(x_d, s)
746  call device_associate(x, x_d)
747 
748  end subroutine device_map_r3
749 
751  subroutine device_map_r4(x, x_d, n)
752  integer, intent(in) :: n
753  class(*), intent(inout), target :: x(:,:,:,:)
754  type(c_ptr), intent(inout) :: x_d
755  integer(c_size_t) :: s
756 
757  if (c_associated(x_d)) then
758  call neko_error('Device pointer already associated')
759  end if
760 
761  select type(x)
762  type is (integer)
763  s = n * 4
764  type is (integer(i8))
765  s = n * 8
766  type is (real)
767  s = n * 4
768  type is (double precision)
769  s = n * 8
770  class default
771  call neko_error('Unknown Fortran type')
772  end select
773 
774  call device_alloc(x_d, s)
775  call device_associate(x, x_d)
776 
777  end subroutine device_map_r4
778 
780  function device_associated_r1(x) result(assoc)
781  class(*), intent(inout), target :: x(:)
782  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
783  logical :: assoc
784 
785  select type(x)
786  type is (integer)
787  htbl_ptr_h%ptr = c_loc(x)
788  type is (integer(i8))
789  htbl_ptr_h%ptr = c_loc(x)
790  type is (real)
791  htbl_ptr_h%ptr = c_loc(x)
792  type is (double precision)
793  htbl_ptr_h%ptr = c_loc(x)
794  class default
795  call neko_error('Unknown Fortran type')
796  end select
797 
798  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
799  assoc = .true.
800  else
801  assoc = .false.
802  end if
803 
804  end function device_associated_r1
805 
807  function device_associated_r2(x) result(assoc)
808  class(*), intent(inout), target :: x(:,:)
809  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
810  logical :: assoc
811 
812  select type(x)
813  type is (integer)
814  htbl_ptr_h%ptr = c_loc(x)
815  type is (integer(i8))
816  htbl_ptr_h%ptr = c_loc(x)
817  type is (real)
818  htbl_ptr_h%ptr = c_loc(x)
819  type is (double precision)
820  htbl_ptr_h%ptr = c_loc(x)
821  class default
822  call neko_error('Unknown Fortran type')
823  end select
824 
825  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
826  assoc = .true.
827  else
828  assoc = .false.
829  end if
830 
831  end function device_associated_r2
832 
834  function device_associated_r3(x) result(assoc)
835  class(*), intent(inout), target :: x(:,:,:)
836  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
837  logical :: assoc
838 
839  select type(x)
840  type is (integer)
841  htbl_ptr_h%ptr = c_loc(x)
842  type is (integer(i8))
843  htbl_ptr_h%ptr = c_loc(x)
844  type is (real)
845  htbl_ptr_h%ptr = c_loc(x)
846  type is (double precision)
847  htbl_ptr_h%ptr = c_loc(x)
848  class default
849  call neko_error('Unknown Fortran type')
850  end select
851 
852  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
853  assoc = .true.
854  else
855  assoc = .false.
856  end if
857 
858  end function device_associated_r3
859 
861  function device_associated_r4(x) result(assoc)
862  class(*), intent(inout), target :: x(:,:,:,:)
863  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
864  logical :: assoc
865 
866  select type(x)
867  type is (integer)
868  htbl_ptr_h%ptr = c_loc(x)
869  type is (integer(i8))
870  htbl_ptr_h%ptr = c_loc(x)
871  type is (real)
872  htbl_ptr_h%ptr = c_loc(x)
873  type is (double precision)
874  htbl_ptr_h%ptr = c_loc(x)
875  class default
876  call neko_error('Unknown Fortran type')
877  end select
878 
879  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
880  assoc = .true.
881  else
882  assoc = .false.
883  end if
884 
885  end function device_associated_r4
886 
888  function device_get_ptr_r1(x)
889  class(*), intent(in), target :: x(:)
890  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
891  type(c_ptr) :: device_get_ptr_r1
892 
893  device_get_ptr_r1 = c_null_ptr
894 
895  select type(x)
896  type is (integer)
897  htbl_ptr_h%ptr = c_loc(x)
898  type is (integer(i8))
899  htbl_ptr_h%ptr = c_loc(x)
900  type is (real)
901  htbl_ptr_h%ptr = c_loc(x)
902  type is (double precision)
903  htbl_ptr_h%ptr = c_loc(x)
904  class default
905  call neko_error('Unknown Fortran type')
906  end select
907 
908  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
909  device_get_ptr_r1 = htbl_ptr_d%ptr
910  else
911  call neko_error('Array not associated with device')
912  end if
913  end function device_get_ptr_r1
914 
916  function device_get_ptr_r2(x)
917  class(*), intent(in), target :: x(:,:)
918  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
919  type(c_ptr) :: device_get_ptr_r2
920 
921  device_get_ptr_r2 = c_null_ptr
922 
923  select type(x)
924  type is (integer)
925  htbl_ptr_h%ptr = c_loc(x)
926  type is (integer(i8))
927  htbl_ptr_h%ptr = c_loc(x)
928  type is (real)
929  htbl_ptr_h%ptr = c_loc(x)
930  type is (double precision)
931  htbl_ptr_h%ptr = c_loc(x)
932  class default
933  call neko_error('Unknown Fortran type')
934  end select
935 
936  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
937  device_get_ptr_r2 = htbl_ptr_d%ptr
938  else
939  call neko_error('Array not associated with device')
940  end if
941  end function device_get_ptr_r2
942 
944  function device_get_ptr_r3(x)
945  class(*), intent(in), target :: x(:,:,:)
946  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
947  type(c_ptr) :: device_get_ptr_r3
948 
949  device_get_ptr_r3 = c_null_ptr
950 
951  select type(x)
952  type is (integer)
953  htbl_ptr_h%ptr = c_loc(x)
954  type is (integer(i8))
955  htbl_ptr_h%ptr = c_loc(x)
956  type is (real)
957  htbl_ptr_h%ptr = c_loc(x)
958  type is (double precision)
959  htbl_ptr_h%ptr = c_loc(x)
960  class default
961  call neko_error('Unknown Fortran type')
962  end select
963 
964  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
965  device_get_ptr_r3 = htbl_ptr_d%ptr
966  else
967  call neko_error('Array not associated with device')
968  end if
969  end function device_get_ptr_r3
970 
972  function device_get_ptr_r4(x)
973  class(*), intent(in), target :: x(:,:,:,:)
974  type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
975  type(c_ptr) :: device_get_ptr_r4
976 
977  device_get_ptr_r4 = c_null_ptr
978 
979  select type(x)
980  type is (integer)
981  htbl_ptr_h%ptr = c_loc(x)
982  type is (integer(i8))
983  htbl_ptr_h%ptr = c_loc(x)
984  type is (real)
985  htbl_ptr_h%ptr = c_loc(x)
986  type is (double precision)
987  htbl_ptr_h%ptr = c_loc(x)
988  class default
989  call neko_error('Unknown Fortran type')
990  end select
991 
992  if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
993  device_get_ptr_r4 = htbl_ptr_d%ptr
994  else
995  call neko_error('Array not associated with device')
996  end if
997  end function device_get_ptr_r4
998 
1000  subroutine device_sync_device()
1001 #ifdef HAVE_HIP
1002  if (hipdevicesynchronize() .ne. hipsuccess) then
1003  call neko_error('Error during device sync')
1004  end if
1005 #elif HAVE_CUDA
1006  if (cudadevicesynchronize() .ne. cudasuccess) then
1007  call neko_error('Error during device sync')
1008  end if
1009 #elif HAVE_OPENCL
1010  if (clfinish(glb_cmd_queue) .ne. cl_success) then
1011  call neko_error('Error during device sync')
1012  end if
1013 #endif
1014  end subroutine device_sync_device
1015 
1017  subroutine device_sync_stream(stream)
1018  type(c_ptr), intent(in) :: stream
1019 #ifdef HAVE_HIP
1020  if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1021  call neko_error('Error during stream sync')
1022  end if
1023 #elif HAVE_CUDA
1024  if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1025  call neko_error('Error during stream sync')
1026  end if
1027 #elif HAVE_OPENCL
1028  if (clfinish(stream) .ne. cl_success) then
1029  call neko_error('Error during stream sync')
1030  end if
1031 #endif
1032  end subroutine device_sync_stream
1033 
1035  subroutine device_stream_create(stream, flags)
1036  type(c_ptr), intent(inout) :: stream
1037  integer, optional :: flags
1038  integer :: ierr
1039 #ifdef HAVE_HIP
1040  if (present(flags)) then
1041  if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1042  call neko_error('Error during stream create (w. flags)')
1043  end if
1044  else
1045  if (hipstreamcreate(stream) .ne. hipsuccess) then
1046  call neko_error('Error during stream create')
1047  end if
1048  end if
1049 #elif HAVE_CUDA
1050  if (present(flags)) then
1051  if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1052  call neko_error('Error during stream create (w. flags)')
1053  end if
1054  else
1055  if (cudastreamcreate(stream) .ne. cudasuccess) then
1056  call neko_error('Error during stream create')
1057  end if
1058  end if
1059 #elif HAVE_OPENCL
1060  stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1061  if (ierr .ne. cl_success) then
1062  call neko_error('Error during stream create')
1063  end if
1064 #endif
1065  end subroutine device_stream_create
1066 
1068  subroutine device_stream_create_with_priority(stream, flags, prio)
1069  type(c_ptr), intent(inout) :: stream
1070  integer, intent(in) :: flags, prio
1071 #ifdef HAVE_HIP
1072  if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1073  call neko_error('Error during stream create (w. priority)')
1074  end if
1075 #elif HAVE_CUDA
1076  if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1077  call neko_error('Error during stream create (w. priority)')
1078  end if
1079 #elif HAVE_OPENCL
1080  call neko_error('Not implemented yet')
1081 #endif
1082  end subroutine device_stream_create_with_priority
1083 
1085  subroutine device_stream_destroy(stream)
1086  type(c_ptr), intent(inout) :: stream
1087 #ifdef HAVE_HIP
1088  if (hipstreamdestroy(stream) .ne. hipsuccess) then
1089  call neko_error('Error during stream destroy')
1090  end if
1091 #elif HAVE_CUDA
1092  if (cudastreamdestroy(stream) .ne. cudasuccess) then
1093  call neko_error('Error during stream destroy')
1094  end if
1095 #elif HAVE_OPENCL
1096  if (clreleasecommandqueue(stream) .ne. cl_success) then
1097  call neko_error('Error during stream destroy')
1098  end if
1099 #endif
1100  end subroutine device_stream_destroy
1101 
1103  subroutine device_stream_wait_event(stream, event, flags)
1104  type(c_ptr), intent(in) :: stream
1105  type(c_ptr), intent(in) :: event
1106  integer :: flags
1107 #ifdef HAVE_HIP
1108  if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1109  call neko_error('Error during stream sync')
1110  end if
1111 #elif HAVE_CUDA
1112  if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1113  call neko_error('Error during stream sync')
1114  end if
1115 #elif HAVE_OPENCL
1116 ! if (clEnqueueWaitForEvents(stream, 1, event) .ne. CL_SUCCESS) then
1117 ! call neko_error('Error during stream sync')
1118 ! end if
1119 #endif
1120  end subroutine device_stream_wait_event
1121 
1124 #if HAVE_CUDA
1125  if (cudaprofilerstart() .ne. cudasuccess) then
1126  call neko_error('Error starting profiler')
1127  end if
1128 #endif
1129  end subroutine device_profiler_start
1130 
1133 #if HAVE_CUDA
1134  if (cudaprofilerstop() .ne. cudasuccess) then
1135  call neko_error('Error stopping profiler')
1136  end if
1137 #endif
1138  end subroutine device_profiler_stop
1139 
1141  subroutine device_event_create(event, flags)
1142  type(c_ptr), intent(inout) :: event
1143  integer, optional :: flags
1144  integer :: ierr
1145 #ifdef HAVE_HIP
1146  if (present(flags)) then
1147  if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1148  call neko_error('Error during event create (w. flags)')
1149  end if
1150  else
1151  if (hipeventcreate(event) .ne. hipsuccess) then
1152  call neko_error('Error during event create')
1153  end if
1154  end if
1155 #elif HAVE_CUDA
1156  if (present(flags)) then
1157  if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1158  call neko_error('Error during event create (w. flags)')
1159  end if
1160  else
1161  if (cudaeventcreate(event) .ne. cudasuccess) then
1162  call neko_error('Error during event create')
1163  end if
1164  end if
1165 #elif HAVE_OPENCL
1166  event = c_null_ptr
1167 #endif
1168  end subroutine device_event_create
1169 
1171  subroutine device_event_destroy(event)
1172  type(c_ptr), intent(inout) :: event
1173 #ifdef HAVE_HIP
1174  if (hipeventdestroy(event) .ne. hipsuccess) then
1175  call neko_error('Error during event destroy')
1176  end if
1177 #elif HAVE_CUDA
1178  if (cudaeventdestroy(event) .ne. cudasuccess) then
1179  call neko_error('Error during event destroy')
1180  end if
1181 #elif HAVE_OPENCL
1182  if (clreleaseevent(event) .ne. cl_success) then
1183  call neko_error('Error during event destroy')
1184  end if
1185 #endif
1186  end subroutine device_event_destroy
1187 
1189  subroutine device_event_record(event, stream)
1190  type(c_ptr), intent(in) :: event
1191  type(c_ptr), intent(in) :: stream
1192 #ifdef HAVE_HIP
1193  if (hipeventrecord(event, stream) .ne. hipsuccess) then
1194  call neko_error('Error recording an event')
1195  end if
1196 #elif HAVE_CUDA
1197  if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1198  call neko_error('Error recording an event')
1199  end if
1200 #elif HAVE_OPENCL
1201  if (clenqueuemarkerwithwaitlist(stream, 0, c_null_ptr, event) .ne. cl_success) then
1202  call neko_error('Error recording an event')
1203  end if
1204 #endif
1205  end subroutine device_event_record
1206 
1208  subroutine device_event_sync(event)
1209  type(c_ptr), intent(in) :: event
1210 #ifdef HAVE_HIP
1211  if (hipeventsynchronize(event) .ne. hipsuccess) then
1212  call neko_error('Error during event sync')
1213  end if
1214 #elif HAVE_CUDA
1215  if (cudaeventsynchronize(event) .ne. cudasuccess) then
1216  call neko_error('Error during event sync')
1217  end if
1218 #elif HAVE_OPENCL
1219  if (clwaitforevents(1, event) .ne. cl_success) then
1220  call neko_error('Error during event sync')
1221  end if
1222 #endif
1223  end subroutine device_event_sync
1224 
1225 
1226 end module device
double real
Definition: device_config.h:12
Associate a Fortran array to a (allocated) device pointer.
Definition: device.F90:63
Check if a Fortran array is assoicated with a device pointer.
Definition: device.F90:69
Deassociate a Fortran array from a device pointer.
Definition: device.F90:75
Return the device pointer for an associated Fortran array.
Definition: device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Synchronize a device or stream.
Definition: device.F90:87
Fortran CUDA interface.
Definition: cuda_intf.F90:34
subroutine cuda_device_name(name)
Definition: cuda_intf.F90:328
subroutine cuda_finalize
Definition: cuda_intf.F90:318
@ cudamemcpydevicetohost
Definition: cuda_intf.F90:66
@ cudamemcpydevicetodevice
Definition: cuda_intf.F90:67
@ cudamemcpyhosttodevice
Definition: cuda_intf.F90:65
subroutine cuda_init
Definition: cuda_intf.F90:279
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Definition: cuda_intf.F90:43
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition: device.F90:1190
subroutine, public device_event_sync(event)
Synchronize an event.
Definition: device.F90:1209
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition: device.F90:494
subroutine, public device_finalize
Definition: device.F90:122
integer, parameter, public device_to_device
Definition: device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition: device.F90:973
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition: device.F90:889
integer, parameter, public host_to_device
Definition: device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition: device.F90:723
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition: device.F90:374
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition: device.F90:835
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition: device.F90:92
subroutine, public device_profiler_stop()
Stop device profiling.
Definition: device.F90:1133
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition: device.F90:617
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition: device.F90:1018
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition: device.F90:945
subroutine, public device_profiler_start()
Start device profiling.
Definition: device.F90:1124
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition: device.F90:694
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition: device.F90:229
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition: device.F90:752
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:172
integer, parameter, public device_to_host
Definition: device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition: device.F90:345
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition: device.F90:1172
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition: device.F90:151
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition: device.F90:1036
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition: device.F90:641
subroutine device_sync_device()
Synchronize the device.
Definition: device.F90:1001
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition: device.F90:469
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition: device.F90:305
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition: device.F90:1104
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition: device.F90:665
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition: device.F90:191
subroutine device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition: device.F90:1069
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition: device.F90:1142
subroutine, public device_name(name)
Definition: device.F90:138
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition: device.F90:862
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition: device.F90:808
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition: device.F90:917
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition: device.F90:544
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition: device.F90:569
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition: device.F90:593
subroutine, public device_init
Definition: device.F90:107
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition: device.F90:519
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition: device.F90:781
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition: device.F90:267
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition: device.F90:1086
Dummy device interface.
Fortran HIP interface.
Definition: hip_intf.F90:34
subroutine hip_device_name(name)
Definition: hip_intf.F90:292
subroutine hip_init
Definition: hip_intf.F90:264
@ hipmemcpydevicetohost
Definition: hip_intf.F90:84
@ hipmemcpydevicetodevice
Definition: hip_intf.F90:85
@ hipmemcpyhosttodevice
Definition: hip_intf.F90:83
subroutine hip_finalize
Definition: hip_intf.F90:282
@ hipsuccess
Definition: hip_intf.F90:55
Implements a hash table ADT.
Definition: htable.f90:36
integer, parameter, public i8
Definition: num_types.f90:7
Fortran OpenCL interface.
Definition: opencl_intf.F90:34
subroutine opencl_device_name(name)
subroutine opencl_init
subroutine opencl_finalize
OpenCL JIT program library.
Definition: prgm_lib.F90:2
subroutine opencl_prgm_lib_release
Definition: prgm_lib.F90:88
Utilities.
Definition: utils.f90:35
C pointer based hash table.
Definition: htable.f90:142