Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
40 use htable, only : htable_cptr_t, h_cptr_t
41 use utils, only : neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49
51 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
52
54 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
55
56#ifdef HAVE_OPENCL
57
58 type(c_ptr), public, bind(c) :: prf_cmd_queue = c_null_ptr
59#endif
60
62 type(c_ptr), public, bind(c) :: glb_cmd_event
63
65 integer, public :: strm_high_prio
66
68 integer, public :: strm_low_prio
69
74 end interface device_memcpy
75
77 interface device_map
78 module procedure device_map_r1, device_map_r2, &
80 end interface device_map
81
86 end interface device_unmap
87
92 end interface device_associate
93
98 end interface device_associated
99
104 end interface device_deassociate
105
110 end interface device_get_ptr
111
113 interface device_sync
114 module procedure device_sync_device, device_sync_stream
115 end interface device_sync
116
119
128
129 private :: device_memcpy_common
130
131contains
132
133 subroutine device_init
134#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
135 call device_addrtbl%init(64)
136
137#ifdef HAVE_HIP
139#elif HAVE_CUDA
141#elif HAVE_OPENCL
143#endif
145#endif
146
147 ! Check the device count against the number of MPI ranks
148 if (neko_bcknd_device .eq. 1) then
149 if (device_count() .ne. 1) then
150 call neko_error('Only one device is supported per MPI rank')
151 end if
152 end if
153 end subroutine device_init
154
156#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
157 call device_addrtbl%free()
158
159#ifdef HAVE_HIP
161#elif HAVE_CUDA
163#elif HAVE_OPENCL
166#endif
168#endif
169 end subroutine device_finalize
170
171 subroutine device_name(name)
172 character(len=*), intent(inout) :: name
173
174#ifdef HAVE_HIP
175 call hip_device_name(name)
176#elif HAVE_CUDA
177 call cuda_device_name(name)
178#elif HAVE_OPENCL
179 call opencl_device_name(name)
180#endif
181 end subroutine device_name
182
184 integer function device_count()
185#ifdef HAVE_HIP
187#elif HAVE_CUDA
189#elif HAVE_OPENCL
191#else
192 device_count = 0
193#endif
194 end function device_count
195
197 subroutine device_alloc(x_d, s)
198 type(c_ptr), intent(inout) :: x_d
199 integer(c_size_t) :: s
200 integer :: ierr
201
202 if (s .eq. 0) then
203 call device_sync()
204 x_d = c_null_ptr
205 return
206 end if
207#ifdef HAVE_HIP
208 if (hipmalloc(x_d, s) .ne. hipsuccess) then
209 call neko_error('Memory allocation on device failed')
210 end if
211#elif HAVE_CUDA
212 if (cudamalloc(x_d, s) .ne. cudasuccess) then
213 call neko_error('Memory allocation on device failed')
214 end if
215#elif HAVE_OPENCL
216 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
217 if (ierr .ne. cl_success) then
218 call neko_error('Memory allocation on device failed')
219 end if
220#endif
221 end subroutine device_alloc
222
224 subroutine device_free(x_d)
225 type(c_ptr), intent(inout) :: x_d
226#ifdef HAVE_HIP
227 if (hipfree(x_d) .ne. hipsuccess) then
228 call neko_error('Memory deallocation on device failed')
229 end if
230#elif HAVE_CUDA
231 if (cudafree(x_d) .ne. cudasuccess) then
232 call neko_error('Memory deallocation on device failed')
233 end if
234#elif HAVE_OPENCL
235 if (clreleasememobject(x_d) .ne. cl_success) then
236 call neko_error('Memory deallocation on device failed')
237 end if
238#endif
239 x_d = c_null_ptr
240 end subroutine device_free
241
243 subroutine device_memset(x_d, v, s, sync, strm)
244 type(c_ptr), intent(inout) :: x_d
245 integer(c_int), target, value :: v
246 integer(c_size_t), intent(in) :: s
247 logical, optional :: sync
248 type(c_ptr), optional :: strm
249 type(c_ptr) :: stream
250 logical :: sync_device
251
252 if (present(sync)) then
253 sync_device = sync
254 else
255 sync_device = .false.
256 end if
257
258 if (present(strm)) then
259 stream = strm
260 else
261 stream = glb_cmd_queue
262 end if
263
264#ifdef HAVE_HIP
265 if (hipmemsetasync(x_d, v, s, stream) .ne. hipsuccess) then
266 call neko_error('Device memset async failed')
267 end if
268#elif HAVE_CUDA
269 if (cudamemsetasync(x_d, v, s, stream) .ne. cudasuccess) then
270 call neko_error('Device memset async failed')
271 end if
272#elif HAVE_OPENCL
273 if (clenqueuefillbuffer(stream, x_d, c_loc(v), c_sizeof(v), 0_i8, &
274 s, 0, c_null_ptr, c_null_ptr) .ne. cl_success) then
275 call neko_error('Device memset async failed')
276 end if
277#endif
278
279 if (sync_device) then
280 call device_sync_stream(stream)
281 end if
282
283 end subroutine device_memset
284
286 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
287 integer, intent(in) :: n
288 class(*), intent(inout), target :: x(:)
289 type(c_ptr), intent(inout) :: x_d
290 integer, intent(in), value :: dir
291 logical :: sync
292 type(c_ptr), optional :: strm
293 type(c_ptr) :: ptr_h, copy_stream
294 integer(c_size_t) :: s
295
296 if (present(strm)) then
297 copy_stream = strm
298 else
299 copy_stream = glb_cmd_queue
300 end if
301
302 select type (x)
303 type is (integer)
304 s = n * int(4, c_size_t)
305 ptr_h = c_loc(x)
306 type is (integer(i8))
307 s = n * int(8, c_size_t)
308 ptr_h = c_loc(x)
309 type is (real)
310 s = n * int(4, c_size_t)
311 ptr_h = c_loc(x)
312 type is (double precision)
313 s = n * int(8, c_size_t)
314 ptr_h = c_loc(x)
315 class default
316 call neko_error('Unknown Fortran type')
317 end select
318
319 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
320
321 end subroutine device_memcpy_r1
322
324 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
325 integer, intent(in) :: n
326 class(*), intent(inout), target :: x(:,:)
327 type(c_ptr), intent(inout) :: x_d
328 integer, intent(in), value :: dir
329 logical :: sync
330 type(c_ptr), optional :: strm
331 type(c_ptr) :: ptr_h, copy_stream
332 integer(c_size_t) :: s
333
334 if (present(strm)) then
335 copy_stream = strm
336 else
337 copy_stream = glb_cmd_queue
338 end if
339
340 select type (x)
341 type is (integer)
342 s = n * int(4, c_size_t)
343 ptr_h = c_loc(x)
344 type is (integer(i8))
345 s = n * int(8, c_size_t)
346 ptr_h = c_loc(x)
347 type is (real)
348 s = n * int(4, c_size_t)
349 ptr_h = c_loc(x)
350 type is (double precision)
351 s = n * int(8, c_size_t)
352 ptr_h = c_loc(x)
353 class default
354 call neko_error('Unknown Fortran type')
355 end select
356
357 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
358
359 end subroutine device_memcpy_r2
360
362 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
363 integer, intent(in) :: n
364 class(*), intent(inout), target :: x(:,:,:)
365 type(c_ptr), intent(inout) :: x_d
366 integer, intent(in), value :: dir
367 logical :: sync
368 type(c_ptr), optional :: strm
369 type(c_ptr) :: ptr_h, copy_stream
370 integer(c_size_t) :: s
371
372 if (present(strm)) then
373 copy_stream = strm
374 else
375 copy_stream = glb_cmd_queue
376 end if
377
378 select type (x)
379 type is (integer)
380 s = n * int(4, c_size_t)
381 ptr_h = c_loc(x)
382 type is (integer(i8))
383 s = n * int(8, c_size_t)
384 ptr_h = c_loc(x)
385 type is (real)
386 s = n * int(4, c_size_t)
387 ptr_h = c_loc(x)
388 type is (double precision)
389 s = n * int(8, c_size_t)
390 ptr_h = c_loc(x)
391 class default
392 call neko_error('Unknown Fortran type')
393 end select
394
395 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
396
397 end subroutine device_memcpy_r3
398
400 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
401 integer, intent(in) :: n
402 class(*), intent(inout), target :: x(:,:,:,:)
403 type(c_ptr), intent(inout) :: x_d
404 integer, intent(in), value :: dir
405 logical :: sync
406 type(c_ptr), optional :: strm
407 type(c_ptr) :: ptr_h, copy_stream
408 integer(c_size_t) :: s
409
410 if (present(strm)) then
411 copy_stream = strm
412 else
413 copy_stream = glb_cmd_queue
414 end if
415
416 select type (x)
417 type is (integer)
418 s = n * int(4, c_size_t)
419 ptr_h = c_loc(x)
420 type is (integer(i8))
421 s = n * int(8, c_size_t)
422 ptr_h = c_loc(x)
423 type is (real)
424 s = n * int(4, c_size_t)
425 ptr_h = c_loc(x)
426 type is (double precision)
427 s = n * int(8, c_size_t)
428 ptr_h = c_loc(x)
429 class default
430 call neko_error('Unknown Fortran type')
431 end select
432
433 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
434
435 end subroutine device_memcpy_r4
436
440 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
441 type(c_ptr), intent(inout) :: dst
442 type(c_ptr), intent(inout) :: src
443 integer(c_size_t), intent(in) :: s
444 integer, intent(in), value :: dir
445 logical, optional :: sync
446 type(c_ptr), optional :: strm
447 type(c_ptr) :: copy_stream
448 logical :: sync_device
449
450 if (present(sync)) then
451 sync_device = sync
452 else
453 sync_device = .false.
454 end if
455
456 if (present(strm)) then
457 copy_stream = strm
458 else
459 copy_stream = glb_cmd_queue
460 end if
461
462 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
463
464 end subroutine device_memcpy_cptr
465
469 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
470 type(c_ptr), intent(inout) :: ptr_h
471 type(c_ptr), intent(inout) :: x_d
472 integer(c_size_t), intent(in) :: s
473 integer, intent(in), value :: dir
474 logical, intent(in) :: sync_device
475 type(c_ptr), intent(inout) :: stream
476
477 if (s .eq. 0) then
478 if (sync_device) then
479 call device_sync_stream(stream)
480 end if
481 return
482 end if
483
484#ifdef HAVE_HIP
485 if (dir .eq. host_to_device) then
486 if (hipmemcpyasync(x_d, ptr_h, s, &
487 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
488 call neko_error('Device memcpy async (host-to-device) failed')
489 end if
490 else if (dir .eq. device_to_host) then
491 if (hipmemcpyasync(ptr_h, x_d, s, &
492 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
493 call neko_error('Device memcpy async (device-to-host) failed')
494 end if
495 else if (dir .eq. device_to_device) then
496 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
497 .ne. hipsuccess) then
498 call neko_error('Device memcpy async (device-to-device) failed')
499 end if
500 else
501 call neko_error('Device memcpy failed (invalid direction')
502 end if
503 if (sync_device) then
504 call device_sync_stream(stream)
505 end if
506#elif HAVE_CUDA
507 if (dir .eq. host_to_device) then
508 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
509 .ne. cudasuccess) then
510 call neko_error('Device memcpy async (host-to-device) failed')
511 end if
512 else if (dir .eq. device_to_host) then
513 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
514 .ne. cudasuccess) then
515 call neko_error('Device memcpy async (device-to-host) failed')
516 end if
517 else if (dir .eq. device_to_device) then
518 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
519 .ne. cudasuccess) then
520 call neko_error('Device memcpy async (device-to-device) failed')
521 end if
522 else
523 call neko_error('Device memcpy failed (invalid direction')
524 end if
525 if (sync_device) then
526 call device_sync_stream(stream)
527 end if
528#elif HAVE_OPENCL
529 if (sync_device) then
530 if (dir .eq. host_to_device) then
531 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
532 ptr_h, 0, c_null_ptr, c_null_ptr) &
533 .ne. cl_success) then
534 call neko_error('Device memcpy (host-to-device) failed')
535 end if
536 else if (dir .eq. device_to_host) then
537 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
538 0, c_null_ptr, c_null_ptr) &
539 .ne. cl_success) then
540 call neko_error('Device memcpy (device-to-host) failed')
541 end if
542 else if (dir .eq. device_to_device) then
543 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
544 0, c_null_ptr, c_null_ptr) &
545 .ne. cl_success) then
546 call neko_error('Device memcpy (device-to-device) failed')
547 end if
548 else
549 call neko_error('Device memcpy failed (invalid direction')
550 end if
551 else
552 if (dir .eq. host_to_device) then
553 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
554 ptr_h, 0, c_null_ptr, c_null_ptr) &
555 .ne. cl_success) then
556 call neko_error('Device memcpy (host-to-device) failed')
557 end if
558 else if (dir .eq. device_to_host) then
559 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
560 0, c_null_ptr, c_null_ptr) &
561 .ne. cl_success) then
562 call neko_error('Device memcpy (device-to-host) failed')
563 end if
564 else if (dir .eq. device_to_device) then
565 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
566 0, c_null_ptr, c_null_ptr) &
567 .ne. cl_success) then
568 call neko_error('Device memcpy (device-to-device) failed')
569 end if
570 else
571 call neko_error('Device memcpy failed (invalid direction')
572 end if
573 end if
574#endif
575 end subroutine device_memcpy_common
576
578 subroutine device_associate_r1(x, x_d, n)
579 class(*), intent(inout), target :: x(:)
580 type(c_ptr), intent(inout) :: x_d
581 integer, intent(in), optional :: n
582 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
583 integer :: n_
584
585 if (present(n)) then
586 n_ = n
587 else
588 n_ = size(x)
589 end if
590
591 if (n_ .eq. 0) return
592 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
593 ' to a null device pointer for a non-empty array')
594
595 select type (x)
596 type is (integer)
597 htbl_ptr_h%ptr = c_loc(x)
598 type is (integer(i8))
599 htbl_ptr_h%ptr = c_loc(x)
600 type is (real)
601 htbl_ptr_h%ptr = c_loc(x)
602 type is (double precision)
603 htbl_ptr_h%ptr = c_loc(x)
604 class default
605 call neko_error('Unknown Fortran type')
606 end select
607
608 htbl_ptr_d%ptr = x_d
609
610 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
611
612 end subroutine device_associate_r1
613
615 subroutine device_associate_r2(x, x_d, n)
616 class(*), intent(inout), target :: x(:,:)
617 type(c_ptr), intent(inout) :: x_d
618 integer, intent(in), optional :: n
619 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
620 integer :: n_
621
622 if (present(n)) then
623 n_ = n
624 else
625 n_ = size(x)
626 end if
627
628 if (n_ .eq. 0) return
629 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
630 ' to a null device pointer for a non-empty array')
631
632 select type (x)
633 type is (integer)
634 htbl_ptr_h%ptr = c_loc(x)
635 type is (integer(i8))
636 htbl_ptr_h%ptr = c_loc(x)
637 type is (real)
638 htbl_ptr_h%ptr = c_loc(x)
639 type is (double precision)
640 htbl_ptr_h%ptr = c_loc(x)
641 class default
642 call neko_error('Unknown Fortran type')
643 end select
644
645 htbl_ptr_d%ptr = x_d
646
647 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
648
649 end subroutine device_associate_r2
650
652 subroutine device_associate_r3(x, x_d, n)
653 class(*), intent(inout), target :: x(:,:,:)
654 type(c_ptr), intent(inout) :: x_d
655 integer, intent(in), optional :: n
656 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
657 integer :: n_
658
659 if (present(n)) then
660 n_ = n
661 else
662 n_ = size(x)
663 end if
664
665 if (n_ .eq. 0) return
666 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
667 ' to a null device pointer for a non-empty array')
668 select type (x)
669 type is (integer)
670 htbl_ptr_h%ptr = c_loc(x)
671 type is (integer(i8))
672 htbl_ptr_h%ptr = c_loc(x)
673 type is (real)
674 htbl_ptr_h%ptr = c_loc(x)
675 type is (double precision)
676 htbl_ptr_h%ptr = c_loc(x)
677 class default
678 call neko_error('Unknown Fortran type')
679 end select
680
681 htbl_ptr_d%ptr = x_d
682
683 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
684
685 end subroutine device_associate_r3
686
688 subroutine device_associate_r4(x, x_d, n)
689 class(*), intent(inout), target :: x(:,:,:,:)
690 type(c_ptr), intent(inout) :: x_d
691 integer, intent(in), optional :: n
692 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
693 integer :: n_
694
695 if (present(n)) then
696 n_ = n
697 else
698 n_ = size(x)
699 end if
700
701 if (n_ .eq. 0) return
702 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
703 ' to a null device pointer for a non-empty array')
704
705 select type (x)
706 type is (integer)
707 htbl_ptr_h%ptr = c_loc(x)
708 type is (integer(i8))
709 htbl_ptr_h%ptr = c_loc(x)
710 type is (real)
711 htbl_ptr_h%ptr = c_loc(x)
712 type is (double precision)
713 htbl_ptr_h%ptr = c_loc(x)
714 class default
715 call neko_error('Unknown Fortran type')
716 end select
717
718 htbl_ptr_d%ptr = x_d
719
720 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
721
722 end subroutine device_associate_r4
723
726 class(*), intent(inout), target :: x(:)
727 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
728
729 select type (x)
730 type is (integer)
731 htbl_ptr_h%ptr = c_loc(x)
732 type is (integer(i8))
733 htbl_ptr_h%ptr = c_loc(x)
734 type is (real)
735 htbl_ptr_h%ptr = c_loc(x)
736 type is (double precision)
737 htbl_ptr_h%ptr = c_loc(x)
738 class default
739 call neko_error('Unknown Fortran type')
740 end select
741
742 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
743 call device_addrtbl%remove(htbl_ptr_h)
744 end if
745
746 end subroutine device_deassociate_r1
747
750 class(*), intent(inout), target :: x(:,:)
751 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
752
753 select type (x)
754 type is (integer)
755 htbl_ptr_h%ptr = c_loc(x)
756 type is (integer(i8))
757 htbl_ptr_h%ptr = c_loc(x)
758 type is (real)
759 htbl_ptr_h%ptr = c_loc(x)
760 type is (double precision)
761 htbl_ptr_h%ptr = c_loc(x)
762 class default
763 call neko_error('Unknown Fortran type')
764 end select
765
766 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
767 call device_addrtbl%remove(htbl_ptr_h)
768 end if
769
770 end subroutine device_deassociate_r2
771
774 class(*), intent(inout), target :: x(:,:,:)
775 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
776
777 select type (x)
778 type is (integer)
779 htbl_ptr_h%ptr = c_loc(x)
780 type is (integer(i8))
781 htbl_ptr_h%ptr = c_loc(x)
782 type is (real)
783 htbl_ptr_h%ptr = c_loc(x)
784 type is (double precision)
785 htbl_ptr_h%ptr = c_loc(x)
786 class default
787 call neko_error('Unknown Fortran type')
788 end select
789
790 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
791 call device_addrtbl%remove(htbl_ptr_h)
792 end if
793
794 end subroutine device_deassociate_r3
795
798 class(*), intent(inout), target :: x(:,:,:,:)
799 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
800
801 select type (x)
802 type is (integer)
803 htbl_ptr_h%ptr = c_loc(x)
804 type is (integer(i8))
805 htbl_ptr_h%ptr = c_loc(x)
806 type is (real)
807 htbl_ptr_h%ptr = c_loc(x)
808 type is (double precision)
809 htbl_ptr_h%ptr = c_loc(x)
810 class default
811 call neko_error('Unknown Fortran type')
812 end select
813
814 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
815 call device_addrtbl%remove(htbl_ptr_h)
816 end if
817
818 end subroutine device_deassociate_r4
819
821 subroutine device_map_r1(x, x_d, n)
822 integer, intent(in) :: n
823 class(*), intent(inout), target :: x(:)
824 type(c_ptr), intent(inout) :: x_d
825 integer(c_size_t) :: s
826
827 if (c_associated(x_d)) then
828 call neko_error('Device pointer already associated')
829 end if
830
831 select type (x)
832 type is (integer)
833 s = n * int(4, c_size_t)
834 type is (integer(i8))
835 s = n * int(8, c_size_t)
836 type is (real)
837 s = n * int(4, c_size_t)
838 type is (double precision)
839 s = n * int(8, c_size_t)
840 class default
841 call neko_error('Unknown Fortran type')
842 end select
843
844 call device_alloc(x_d, s)
845 call device_associate(x, x_d, n)
846
847 end subroutine device_map_r1
848
850 subroutine device_map_r2(x, x_d, n)
851 integer, intent(in) :: n
852 class(*), intent(inout), target :: x(:,:)
853 type(c_ptr), intent(inout) :: x_d
854 integer(c_size_t) :: s
855
856 if (c_associated(x_d)) then
857 call neko_error('Device pointer already associated')
858 end if
859
860 select type (x)
861 type is (integer)
862 s = n * int(4, c_size_t)
863 type is (integer(i8))
864 s = n * int(8, c_size_t)
865 type is (real)
866 s = n * int(4, c_size_t)
867 type is (double precision)
868 s = n * int(8, c_size_t)
869 class default
870 call neko_error('Unknown Fortran type')
871 end select
872
873 call device_alloc(x_d, s)
874 call device_associate(x, x_d, n)
875
876 end subroutine device_map_r2
877
879 subroutine device_map_r3(x, x_d, n)
880 integer, intent(in) :: n
881 class(*), intent(inout), target :: x(:,:,:)
882 type(c_ptr), intent(inout) :: x_d
883 integer(c_size_t) :: s
884
885 if (c_associated(x_d)) then
886 call neko_error('Device pointer already associated')
887 end if
888
889 select type (x)
890 type is (integer)
891 s = n * int(4, c_size_t)
892 type is (integer(i8))
893 s = n * int(8, c_size_t)
894 type is (real)
895 s = n * int(4, c_size_t)
896 type is (double precision)
897 s = n * int(8, c_size_t)
898 class default
899 call neko_error('Unknown Fortran type')
900 end select
901
902 call device_alloc(x_d, s)
903 call device_associate(x, x_d, n)
904
905 end subroutine device_map_r3
906
908 subroutine device_map_r4(x, x_d, n)
909 integer, intent(in) :: n
910 class(*), intent(inout), target :: x(:,:,:,:)
911 type(c_ptr), intent(inout) :: x_d
912 integer(c_size_t) :: s
913
914 if (c_associated(x_d)) then
915 call neko_error('Device pointer already associated')
916 end if
917
918 select type (x)
919 type is (integer)
920 s = n * int(4, c_size_t)
921 type is (integer(i8))
922 s = n * int(8, c_size_t)
923 type is (real)
924 s = n * int(4, c_size_t)
925 type is (double precision)
926 s = n * int(8, c_size_t)
927 class default
928 call neko_error('Unknown Fortran type')
929 end select
930
931 call device_alloc(x_d, s)
932 call device_associate(x, x_d, n)
933
934 end subroutine device_map_r4
935
937 subroutine device_unmap_r1(x, x_d)
938 class(*), intent(inout), target :: x(:)
939 type(c_ptr), intent(inout) :: x_d
940 type(c_ptr) :: dev
941 logical :: mapped
942
943 ! Whether dev has a non-null address, meaning that x is mapped.
944 mapped = device_associated(x)
945
946 ! Repeated calls to this routine do nothing
947 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
948 return
949 end if
950
951 ! Device pointer associated with x, should be same as x_d if mapped
952 if (mapped) then
953 dev = device_get_ptr(x)
954 else
955 dev = c_null_ptr
956 end if
957
958 ! Error if:
959 ! 1) x is not mapped to a device pointer, but x_d is not null.
960 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
961 ! 3) x is mapped to a device pointer that is not x_d.
962 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
963 (.not. c_associated(dev, x_d))) then
964 call neko_error('Inconsistent host/device mapping state in ' // &
965 'device_unmap')
966 end if
967
968 call device_deassociate(x)
969 call device_free(x_d)
970
971 end subroutine device_unmap_r1
972
974 subroutine device_unmap_r2(x, x_d)
975 class(*), intent(inout), target :: x(:,:)
976 type(c_ptr), intent(inout) :: x_d
977 type(c_ptr) :: dev
978 logical :: mapped
979
980 ! Whether dev has a non-null address, meaning that x is mapped.
981 mapped = device_associated(x)
982
983 ! Repeated calls to this routine do nothing
984 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
985 return
986 end if
987
988 ! Device pointer associated with x, should be same as x_d if mapped
989 if (mapped) then
990 dev = device_get_ptr(x)
991 else
992 dev = c_null_ptr
993 end if
994
995 ! Error if:
996 ! 1) x is not mapped to a device pointer, but x_d is not null.
997 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
998 ! 3) x is mapped to a device pointer that is not x_d.
999 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1000 (.not. c_associated(dev, x_d))) then
1001 call neko_error('Inconsistent host/device mapping state in ' // &
1002 'device_unmap')
1003 end if
1004
1005 call device_deassociate(x)
1006 call device_free(x_d)
1007
1008 end subroutine device_unmap_r2
1009
1011 subroutine device_unmap_r3(x, x_d)
1012 class(*), intent(inout), target :: x(:,:,:)
1013 type(c_ptr), intent(inout) :: x_d
1014 type(c_ptr) :: dev
1015 logical :: mapped
1016
1017 ! Whether dev has a non-null address, meaning that x is mapped.
1018 mapped = device_associated(x)
1019
1020 ! Repeated calls to this routine do nothing
1021 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1022 return
1023 end if
1024
1025 ! Device pointer associated with x, should be same as x_d if mapped
1026 if (mapped) then
1027 dev = device_get_ptr(x)
1028 else
1029 dev = c_null_ptr
1030 end if
1031
1032 ! Error if:
1033 ! 1) x is not mapped to a device pointer, but x_d is not null.
1034 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1035 ! 3) x is mapped to a device pointer that is not x_d.
1036 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1037 (.not. c_associated(dev, x_d))) then
1038 call neko_error('Inconsistent host/device mapping state in ' // &
1039 'device_unmap')
1040 end if
1041
1042 call device_deassociate(x)
1043 call device_free(x_d)
1044
1045 end subroutine device_unmap_r3
1046
1048 subroutine device_unmap_r4(x, x_d)
1049 class(*), intent(inout), target :: x(:,:,:,:)
1050 type(c_ptr), intent(inout) :: x_d
1051 type(c_ptr) :: dev
1052 logical :: mapped
1053
1054 ! Whether dev has a non-null address, meaning that x is mapped.
1055 mapped = device_associated(x)
1056
1057 ! Repeated calls to this routine do nothing
1058 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1059 return
1060 end if
1061
1062 ! Device pointer associated with x, should be same as x_d if mapped
1063 if (mapped) then
1064 dev = device_get_ptr(x)
1065 else
1066 dev = c_null_ptr
1067 end if
1068
1069 ! Error if:
1070 ! 1) x is not mapped to a device pointer, but x_d is not null.
1071 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1072 ! 3) x is mapped to a device pointer that is not x_d.
1073 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1074 (.not. c_associated(dev, x_d))) then
1075 call neko_error('Inconsistent host/device mapping state in ' // &
1076 'device_unmap')
1077 end if
1078
1079 call device_deassociate(x)
1080 call device_free(x_d)
1081
1082 end subroutine device_unmap_r4
1083
1085 function device_associated_r1(x) result(assoc)
1086 class(*), intent(inout), target :: x(:)
1087 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1088 logical :: assoc
1089
1090 select type (x)
1091 type is (integer)
1092 htbl_ptr_h%ptr = c_loc(x)
1093 type is (integer(i8))
1094 htbl_ptr_h%ptr = c_loc(x)
1095 type is (real)
1096 htbl_ptr_h%ptr = c_loc(x)
1097 type is (double precision)
1098 htbl_ptr_h%ptr = c_loc(x)
1099 class default
1100 call neko_error('Unknown Fortran type')
1101 end select
1102
1103 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1104 assoc = .true.
1105 else
1106 assoc = .false.
1107 end if
1108
1109 end function device_associated_r1
1110
1112 function device_associated_r2(x) result(assoc)
1113 class(*), intent(inout), target :: x(:,:)
1114 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1115 logical :: assoc
1116
1117 select type (x)
1118 type is (integer)
1119 htbl_ptr_h%ptr = c_loc(x)
1120 type is (integer(i8))
1121 htbl_ptr_h%ptr = c_loc(x)
1122 type is (real)
1123 htbl_ptr_h%ptr = c_loc(x)
1124 type is (double precision)
1125 htbl_ptr_h%ptr = c_loc(x)
1126 class default
1127 call neko_error('Unknown Fortran type')
1128 end select
1129
1130 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1131 assoc = .true.
1132 else
1133 assoc = .false.
1134 end if
1135
1136 end function device_associated_r2
1137
1139 function device_associated_r3(x) result(assoc)
1140 class(*), intent(inout), target :: x(:,:,:)
1141 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1142 logical :: assoc
1143
1144 select type (x)
1145 type is (integer)
1146 htbl_ptr_h%ptr = c_loc(x)
1147 type is (integer(i8))
1148 htbl_ptr_h%ptr = c_loc(x)
1149 type is (real)
1150 htbl_ptr_h%ptr = c_loc(x)
1151 type is (double precision)
1152 htbl_ptr_h%ptr = c_loc(x)
1153 class default
1154 call neko_error('Unknown Fortran type')
1155 end select
1156
1157 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1158 assoc = .true.
1159 else
1160 assoc = .false.
1161 end if
1162
1163 end function device_associated_r3
1164
1166 function device_associated_r4(x) result(assoc)
1167 class(*), intent(inout), target :: x(:,:,:,:)
1168 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1169 logical :: assoc
1170
1171 select type (x)
1172 type is (integer)
1173 htbl_ptr_h%ptr = c_loc(x)
1174 type is (integer(i8))
1175 htbl_ptr_h%ptr = c_loc(x)
1176 type is (real)
1177 htbl_ptr_h%ptr = c_loc(x)
1178 type is (double precision)
1179 htbl_ptr_h%ptr = c_loc(x)
1180 class default
1181 call neko_error('Unknown Fortran type')
1182 end select
1183
1184 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1185 assoc = .true.
1186 else
1187 assoc = .false.
1188 end if
1189
1190 end function device_associated_r4
1191
1194 class(*), intent(in), target :: x(:)
1195 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1196 type(c_ptr) :: device_get_ptr_r1
1197
1198 device_get_ptr_r1 = c_null_ptr
1199
1200 select type (x)
1201 type is (integer)
1202 htbl_ptr_h%ptr = c_loc(x)
1203 type is (integer(i8))
1204 htbl_ptr_h%ptr = c_loc(x)
1205 type is (real)
1206 htbl_ptr_h%ptr = c_loc(x)
1207 type is (double precision)
1208 htbl_ptr_h%ptr = c_loc(x)
1209 class default
1210 call neko_error('Unknown Fortran type')
1211 end select
1212
1213 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1214 device_get_ptr_r1 = htbl_ptr_d%ptr
1215 else
1216 call neko_error('Array not associated with device')
1217 end if
1218 end function device_get_ptr_r1
1219
1222 class(*), intent(in), target :: x(:,:)
1223 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1224 type(c_ptr) :: device_get_ptr_r2
1225
1226 device_get_ptr_r2 = c_null_ptr
1227
1228 select type (x)
1229 type is (integer)
1230 htbl_ptr_h%ptr = c_loc(x)
1231 type is (integer(i8))
1232 htbl_ptr_h%ptr = c_loc(x)
1233 type is (real)
1234 htbl_ptr_h%ptr = c_loc(x)
1235 type is (double precision)
1236 htbl_ptr_h%ptr = c_loc(x)
1237 class default
1238 call neko_error('Unknown Fortran type')
1239 end select
1240
1241 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1242 device_get_ptr_r2 = htbl_ptr_d%ptr
1243 else
1244 call neko_error('Array not associated with device')
1245 end if
1246 end function device_get_ptr_r2
1247
1250 class(*), intent(in), target :: x(:,:,:)
1251 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1252 type(c_ptr) :: device_get_ptr_r3
1253
1254 device_get_ptr_r3 = c_null_ptr
1255
1256 select type (x)
1257 type is (integer)
1258 htbl_ptr_h%ptr = c_loc(x)
1259 type is (integer(i8))
1260 htbl_ptr_h%ptr = c_loc(x)
1261 type is (real)
1262 htbl_ptr_h%ptr = c_loc(x)
1263 type is (double precision)
1264 htbl_ptr_h%ptr = c_loc(x)
1265 class default
1266 call neko_error('Unknown Fortran type')
1267 end select
1268
1269 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1270 device_get_ptr_r3 = htbl_ptr_d%ptr
1271 else
1272 call neko_error('Array not associated with device')
1273 end if
1274 end function device_get_ptr_r3
1275
1278 class(*), intent(in), target :: x(:,:,:,:)
1279 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1280 type(c_ptr) :: device_get_ptr_r4
1281
1282 device_get_ptr_r4 = c_null_ptr
1283
1284 select type (x)
1285 type is (integer)
1286 htbl_ptr_h%ptr = c_loc(x)
1287 type is (integer(i8))
1288 htbl_ptr_h%ptr = c_loc(x)
1289 type is (real)
1290 htbl_ptr_h%ptr = c_loc(x)
1291 type is (double precision)
1292 htbl_ptr_h%ptr = c_loc(x)
1293 class default
1294 call neko_error('Unknown Fortran type')
1295 end select
1296
1297 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1298 device_get_ptr_r4 = htbl_ptr_d%ptr
1299 else
1300 call neko_error('Array not associated with device')
1301 end if
1302 end function device_get_ptr_r4
1303
1306#ifdef HAVE_HIP
1307 if (hipdevicesynchronize() .ne. hipsuccess) then
1308 call neko_error('Error during device sync')
1309 end if
1310#elif HAVE_CUDA
1311 if (cudadevicesynchronize() .ne. cudasuccess) then
1312 call neko_error('Error during device sync')
1313 end if
1314#elif HAVE_OPENCL
1315 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1316 call neko_error('Error during device sync')
1317 end if
1318#endif
1319 end subroutine device_sync_device
1320
1322 subroutine device_sync_stream(stream)
1323 type(c_ptr), intent(in) :: stream
1324#ifdef HAVE_HIP
1325 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1326 call neko_error('Error during stream sync')
1327 end if
1328#elif HAVE_CUDA
1329 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1330 call neko_error('Error during stream sync')
1331 end if
1332#elif HAVE_OPENCL
1333 if (clfinish(stream) .ne. cl_success) then
1334 call neko_error('Error during stream sync')
1335 end if
1336#endif
1337 end subroutine device_sync_stream
1338
1340 subroutine device_stream_create(stream, flags)
1341 type(c_ptr), intent(inout) :: stream
1342 integer, optional :: flags
1343 integer :: ierr
1344#ifdef HAVE_HIP
1345 if (present(flags)) then
1346 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1347 call neko_error('Error during stream create (w. flags)')
1348 end if
1349 else
1350 if (hipstreamcreate(stream) .ne. hipsuccess) then
1351 call neko_error('Error during stream create')
1352 end if
1353 end if
1354#elif HAVE_CUDA
1355 if (present(flags)) then
1356 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1357 call neko_error('Error during stream create (w. flags)')
1358 end if
1359 else
1360 if (cudastreamcreate(stream) .ne. cudasuccess) then
1361 call neko_error('Error during stream create')
1362 end if
1363 end if
1364#elif HAVE_OPENCL
1365 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1366 if (ierr .ne. cl_success) then
1367 call neko_error('Error during stream create')
1368 end if
1369#endif
1370 end subroutine device_stream_create
1371
1373 subroutine device_stream_create_with_priority(stream, flags, prio)
1374 type(c_ptr), intent(inout) :: stream
1375 integer, intent(in) :: flags, prio
1376#ifdef HAVE_HIP
1377 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1378 call neko_error('Error during stream create (w. priority)')
1379 end if
1380#elif HAVE_CUDA
1381 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1382 call neko_error('Error during stream create (w. priority)')
1383 end if
1384#elif HAVE_OPENCL
1385 call neko_error('Not implemented yet')
1386#endif
1388
1390 subroutine device_stream_destroy(stream)
1391 type(c_ptr), intent(inout) :: stream
1392#ifdef HAVE_HIP
1393 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1394 call neko_error('Error during stream destroy')
1395 end if
1396#elif HAVE_CUDA
1397 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1398 call neko_error('Error during stream destroy')
1399 end if
1400#elif HAVE_OPENCL
1401 if (clreleasecommandqueue(stream) .ne. cl_success) then
1402 call neko_error('Error during stream destroy')
1403 end if
1404#endif
1405 end subroutine device_stream_destroy
1406
1408 subroutine device_stream_wait_event(stream, event, flags)
1409 type(c_ptr), intent(in) :: stream
1410 type(c_ptr), target, intent(in) :: event
1411 integer :: flags
1412#ifdef HAVE_HIP
1413 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1414 call neko_error('Error during stream sync')
1415 end if
1416#elif HAVE_CUDA
1417 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1418 call neko_error('Error during stream sync')
1419 end if
1420#elif HAVE_OPENCL
1421 if (clenqueuebarrier(stream) .ne. cl_success) then
1422 call neko_error('Error during barrier')
1423 end if
1424 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1425 call neko_error('Error during stream sync')
1426 end if
1427#endif
1428 end subroutine device_stream_wait_event
1429
1432#if HAVE_CUDA
1433 if (cudaprofilerstart() .ne. cudasuccess) then
1434 call neko_error('Error starting profiler')
1435 end if
1436#endif
1437 end subroutine device_profiler_start
1438
1441#if HAVE_CUDA
1442 if (cudaprofilerstop() .ne. cudasuccess) then
1443 call neko_error('Error stopping profiler')
1444 end if
1445#endif
1446 end subroutine device_profiler_stop
1447
1449 subroutine device_event_create(event, flags)
1450 type(c_ptr), intent(inout) :: event
1451 integer, optional :: flags
1452 integer :: ierr
1453#ifdef HAVE_HIP
1454 if (present(flags)) then
1455 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1456 call neko_error('Error during event create (w. flags)')
1457 end if
1458 else
1459 if (hipeventcreate(event) .ne. hipsuccess) then
1460 call neko_error('Error during event create')
1461 end if
1462 end if
1463#elif HAVE_CUDA
1464 if (present(flags)) then
1465 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1466 call neko_error('Error during event create (w. flags)')
1467 end if
1468 else
1469 if (cudaeventcreate(event) .ne. cudasuccess) then
1470 call neko_error('Error during event create')
1471 end if
1472 end if
1473#elif HAVE_OPENCL
1474 event = c_null_ptr
1475#endif
1476 end subroutine device_event_create
1477
1479 subroutine device_event_destroy(event)
1480 type(c_ptr), intent(inout) :: event
1481#ifdef HAVE_HIP
1482 if (hipeventdestroy(event) .ne. hipsuccess) then
1483 call neko_error('Error during event destroy')
1484 end if
1485#elif HAVE_CUDA
1486 if (cudaeventdestroy(event) .ne. cudasuccess) then
1487 call neko_error('Error during event destroy')
1488 end if
1489#elif HAVE_OPENCL
1490 event = c_null_ptr
1491#endif
1492 end subroutine device_event_destroy
1493
1495 subroutine device_event_record(event, stream)
1496 type(c_ptr), target, intent(in) :: event
1497 type(c_ptr), intent(in) :: stream
1498#ifdef HAVE_HIP
1499 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1500 call neko_error('Error recording an event')
1501 end if
1502#elif HAVE_CUDA
1503 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1504 call neko_error('Error recording an event')
1505 end if
1506#elif HAVE_OPENCL
1507 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1508 call neko_error('Error recording an event')
1509 end if
1510#endif
1511 end subroutine device_event_record
1512
1514 subroutine device_event_sync(event)
1515 type(c_ptr), target, intent(in) :: event
1516#ifdef HAVE_HIP
1517 if (hipeventsynchronize(event) .ne. hipsuccess) then
1518 call neko_error('Error during event sync')
1519 end if
1520#elif HAVE_CUDA
1521 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1522 call neko_error('Error during event sync')
1523 end if
1524#elif HAVE_OPENCL
1525 if (c_associated(event)) then
1526 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1527 call neko_error('Error during event sync')
1528 end if
1529 end if
1530#endif
1531 end subroutine device_event_sync
1532
1533end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:89
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:95
Deassociate a Fortran array from a device pointer.
Definition device.F90:101
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:113
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:83
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1496
subroutine device_associate_r4(x, x_d, n)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:689
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1515
subroutine device_associate_r1(x, x_d, n)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:579
subroutine, public device_finalize
Definition device.F90:156
integer, parameter, public device_to_device
Definition device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1278
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:1194
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:68
integer, parameter, public host_to_device
Definition device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:880
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
Definition device.F90:58
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:470
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:1140
subroutine device_unmap_r4(x, x_d)
Unmap a Fortran rank 4 array from a device (deassociate and free)
Definition device.F90:1049
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:118
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1441
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:774
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1323
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:1250
subroutine device_unmap_r2(x, x_d)
Unmap a Fortran rank 2 array from a device (deassociate and free)
Definition device.F90:975
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1432
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:851
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:325
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:909
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:441
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1480
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:198
subroutine device_associate_r2(x, x_d, n)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:616
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1374
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1341
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:798
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1306
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:401
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1409
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:822
subroutine device_associate_r3(x, x_d, n)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:653
subroutine device_unmap_r1(x, x_d)
Unmap a Fortran rank 1 array from a device (deassociate and free)
Definition device.F90:938
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:287
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1450
integer function, public device_count()
Return the number of available devices.
Definition device.F90:185
subroutine, public device_name(name)
Definition device.F90:172
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:1167
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:1113
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:65
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:1222
subroutine device_unmap_r3(x, x_d)
Unmap a Fortran rank 3 array from a device (deassociate and free)
Definition device.F90:1012
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:726
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:62
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:750
subroutine, public device_init
Definition device.F90:134
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:1086
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
Definition device.F90:244
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:363
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1391
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:271
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:236
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:258
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:288
Implements a hash table ADT.
Definition htable.f90:52
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:128
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:162