Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
40 use htable, only : htable_cptr_t, h_cptr_t
41 use utils, only : neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49
51 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
52
54 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
55
56#ifdef HAVE_OPENCL
57
58 type(c_ptr), public, bind(c) :: prf_cmd_queue = c_null_ptr
59#endif
60
62 type(c_ptr), public, bind(c) :: glb_cmd_event
63
65 integer, public :: strm_high_prio
66
68 integer, public :: strm_low_prio
69
74 end interface device_memcpy
75
77 interface device_map
78 module procedure device_map_r1, device_map_r2, &
80 end interface device_map
81
86 end interface device_unmap
87
92 end interface device_associate
93
98 end interface device_associated
99
104 end interface device_deassociate
105
110 end interface device_get_ptr
111
113 interface device_sync
114 module procedure device_sync_device, device_sync_stream
115 end interface device_sync
116
119
128
129 private :: device_memcpy_common
130
131contains
132
133 subroutine device_init
134#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
135 call device_addrtbl%init(64)
136
137#ifdef HAVE_HIP
139#elif HAVE_CUDA
141#elif HAVE_OPENCL
143#endif
145#endif
146
147 ! Check the device count against the number of MPI ranks
148 if (neko_bcknd_device .eq. 1) then
149 if (device_count() .ne. 1) then
150 call neko_error('Only one device is supported per MPI rank')
151 end if
152 end if
153 end subroutine device_init
154
156#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
157 call device_addrtbl%free()
158
159#ifdef HAVE_HIP
161#elif HAVE_CUDA
163#elif HAVE_OPENCL
166#endif
168#endif
169 end subroutine device_finalize
170
171 subroutine device_name(name)
172 character(len=*), intent(inout) :: name
173
174#ifdef HAVE_HIP
175 call hip_device_name(name)
176#elif HAVE_CUDA
177 call cuda_device_name(name)
178#elif HAVE_OPENCL
179 call opencl_device_name(name)
180#endif
181 end subroutine device_name
182
184 integer function device_count()
185#ifdef HAVE_HIP
187#elif HAVE_CUDA
189#elif HAVE_OPENCL
191#else
192 device_count = 0
193#endif
194 end function device_count
195
197 subroutine device_alloc(x_d, s)
198 type(c_ptr), intent(inout) :: x_d
199 integer(c_size_t) :: s
200 integer :: ierr
201
202 if (s .eq. 0) then
203 call device_sync()
204 x_d = c_null_ptr
205 return
206 end if
207#ifdef HAVE_HIP
208 if (hipmalloc(x_d, s) .ne. hipsuccess) then
209 call neko_error('Memory allocation on device failed')
210 end if
211#elif HAVE_CUDA
212 if (cudamalloc(x_d, s) .ne. cudasuccess) then
213 call neko_error('Memory allocation on device failed')
214 end if
215#elif HAVE_OPENCL
216 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
217 if (ierr .ne. cl_success) then
218 call neko_error('Memory allocation on device failed')
219 end if
220#endif
221 end subroutine device_alloc
222
224 subroutine device_free(x_d)
225 type(c_ptr), intent(inout) :: x_d
226#ifdef HAVE_HIP
227 if (hipfree(x_d) .ne. hipsuccess) then
228 call neko_error('Memory deallocation on device failed')
229 end if
230#elif HAVE_CUDA
231 if (cudafree(x_d) .ne. cudasuccess) then
232 call neko_error('Memory deallocation on device failed')
233 end if
234#elif HAVE_OPENCL
235 if (clreleasememobject(x_d) .ne. cl_success) then
236 call neko_error('Memory deallocation on device failed')
237 end if
238#endif
239 x_d = c_null_ptr
240 end subroutine device_free
241
243 subroutine device_memset(x_d, v, s, sync, strm)
244 type(c_ptr), intent(inout) :: x_d
245 integer(c_int), target, value :: v
246 integer(c_size_t), intent(in) :: s
247 logical, optional :: sync
248 type(c_ptr), optional :: strm
249 type(c_ptr) :: stream
250 logical :: sync_device
251
252 if (present(sync)) then
253 sync_device = sync
254 else
255 sync_device = .false.
256 end if
257
258 if (present(strm)) then
259 stream = strm
260 else
261 stream = glb_cmd_queue
262 end if
263
264#ifdef HAVE_HIP
265 if (hipmemsetasync(x_d, v, s, stream) .ne. hipsuccess) then
266 call neko_error('Device memset async failed')
267 end if
268#elif HAVE_CUDA
269 if (cudamemsetasync(x_d, v, s, stream) .ne. cudasuccess) then
270 call neko_error('Device memset async failed')
271 end if
272#elif HAVE_OPENCL
273 if (clenqueuefillbuffer(stream, x_d, c_loc(v), c_sizeof(v), 0_i8, &
274 s, 0, c_null_ptr, c_null_ptr) .ne. cl_success) then
275 call neko_error('Device memset async failed')
276 end if
277#endif
278
279 if (sync_device) then
280 call device_sync_stream(stream)
281 end if
282
283 end subroutine device_memset
284
286 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
287 integer, intent(in) :: n
288 class(*), intent(inout), target :: x(:)
289 type(c_ptr), intent(inout) :: x_d
290 integer, intent(in), value :: dir
291 logical :: sync
292 type(c_ptr), optional :: strm
293 type(c_ptr) :: ptr_h, copy_stream
294 integer(c_size_t) :: s
295
296 if (present(strm)) then
297 copy_stream = strm
298 else
299 copy_stream = glb_cmd_queue
300 end if
301
302 select type (x)
303 type is (integer)
304 s = n * int(4, c_size_t)
305 ptr_h = c_loc(x)
306 type is (integer(i8))
307 s = n * int(8, c_size_t)
308 ptr_h = c_loc(x)
309 type is (real)
310 s = n * int(4, c_size_t)
311 ptr_h = c_loc(x)
312 type is (double precision)
313 s = n * int(8, c_size_t)
314 ptr_h = c_loc(x)
315 class default
316 call neko_error('Unknown Fortran type')
317 end select
318
319 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
320
321 end subroutine device_memcpy_r1
322
324 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
325 integer, intent(in) :: n
326 class(*), intent(inout), target :: x(:,:)
327 type(c_ptr), intent(inout) :: x_d
328 integer, intent(in), value :: dir
329 logical :: sync
330 type(c_ptr), optional :: strm
331 type(c_ptr) :: ptr_h, copy_stream
332 integer(c_size_t) :: s
333
334 if (present(strm)) then
335 copy_stream = strm
336 else
337 copy_stream = glb_cmd_queue
338 end if
339
340 select type (x)
341 type is (integer)
342 s = n * int(4, c_size_t)
343 ptr_h = c_loc(x)
344 type is (integer(i8))
345 s = n * int(8, c_size_t)
346 ptr_h = c_loc(x)
347 type is (real)
348 s = n * int(4, c_size_t)
349 ptr_h = c_loc(x)
350 type is (double precision)
351 s = n * int(8, c_size_t)
352 ptr_h = c_loc(x)
353 class default
354 call neko_error('Unknown Fortran type')
355 end select
356
357 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
358
359 end subroutine device_memcpy_r2
360
362 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
363 integer, intent(in) :: n
364 class(*), intent(inout), target :: x(:,:,:)
365 type(c_ptr), intent(inout) :: x_d
366 integer, intent(in), value :: dir
367 logical :: sync
368 type(c_ptr), optional :: strm
369 type(c_ptr) :: ptr_h, copy_stream
370 integer(c_size_t) :: s
371
372 if (present(strm)) then
373 copy_stream = strm
374 else
375 copy_stream = glb_cmd_queue
376 end if
377
378 select type (x)
379 type is (integer)
380 s = n * int(4, c_size_t)
381 ptr_h = c_loc(x)
382 type is (integer(i8))
383 s = n * int(8, c_size_t)
384 ptr_h = c_loc(x)
385 type is (real)
386 s = n * int(4, c_size_t)
387 ptr_h = c_loc(x)
388 type is (double precision)
389 s = n * int(8, c_size_t)
390 ptr_h = c_loc(x)
391 class default
392 call neko_error('Unknown Fortran type')
393 end select
394
395 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
396
397 end subroutine device_memcpy_r3
398
400 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
401 integer, intent(in) :: n
402 class(*), intent(inout), target :: x(:,:,:,:)
403 type(c_ptr), intent(inout) :: x_d
404 integer, intent(in), value :: dir
405 logical :: sync
406 type(c_ptr), optional :: strm
407 type(c_ptr) :: ptr_h, copy_stream
408 integer(c_size_t) :: s
409
410 if (present(strm)) then
411 copy_stream = strm
412 else
413 copy_stream = glb_cmd_queue
414 end if
415
416 select type (x)
417 type is (integer)
418 s = n * int(4, c_size_t)
419 ptr_h = c_loc(x)
420 type is (integer(i8))
421 s = n * int(8, c_size_t)
422 ptr_h = c_loc(x)
423 type is (real)
424 s = n * int(4, c_size_t)
425 ptr_h = c_loc(x)
426 type is (double precision)
427 s = n * int(8, c_size_t)
428 ptr_h = c_loc(x)
429 class default
430 call neko_error('Unknown Fortran type')
431 end select
432
433 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
434
435 end subroutine device_memcpy_r4
436
440 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
441 type(c_ptr), intent(inout) :: dst
442 type(c_ptr), intent(inout) :: src
443 integer(c_size_t), intent(in) :: s
444 integer, intent(in), value :: dir
445 logical, optional :: sync
446 type(c_ptr), optional :: strm
447 type(c_ptr) :: copy_stream
448 logical :: sync_device
449
450 if (present(sync)) then
451 sync_device = sync
452 else
453 sync_device = .false.
454 end if
455
456 if (present(strm)) then
457 copy_stream = strm
458 else
459 copy_stream = glb_cmd_queue
460 end if
461
462 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
463
464 end subroutine device_memcpy_cptr
465
469 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
470 type(c_ptr), intent(inout) :: ptr_h
471 type(c_ptr), intent(inout) :: x_d
472 integer(c_size_t), intent(in) :: s
473 integer, intent(in), value :: dir
474 logical, intent(in) :: sync_device
475 type(c_ptr), intent(inout) :: stream
476
477 if (s .eq. 0) then
478 if (sync_device) then
479 call device_sync_stream(stream)
480 end if
481 return
482 end if
483
484#ifdef HAVE_HIP
485 if (dir .eq. host_to_device) then
486 if (hipmemcpyasync(x_d, ptr_h, s, &
487 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
488 call neko_error('Device memcpy async (host-to-device) failed')
489 end if
490 else if (dir .eq. device_to_host) then
491 if (hipmemcpyasync(ptr_h, x_d, s, &
492 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
493 call neko_error('Device memcpy async (device-to-host) failed')
494 end if
495 else if (dir .eq. device_to_device) then
496 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
497 .ne. hipsuccess) then
498 call neko_error('Device memcpy async (device-to-device) failed')
499 end if
500 else
501 call neko_error('Device memcpy failed (invalid direction')
502 end if
503 if (sync_device) then
504 call device_sync_stream(stream)
505 end if
506#elif HAVE_CUDA
507 if (dir .eq. host_to_device) then
508 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
509 .ne. cudasuccess) then
510 call neko_error('Device memcpy async (host-to-device) failed')
511 end if
512 else if (dir .eq. device_to_host) then
513 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
514 .ne. cudasuccess) then
515 call neko_error('Device memcpy async (device-to-host) failed')
516 end if
517 else if (dir .eq. device_to_device) then
518 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
519 .ne. cudasuccess) then
520 call neko_error('Device memcpy async (device-to-device) failed')
521 end if
522 else
523 call neko_error('Device memcpy failed (invalid direction')
524 end if
525 if (sync_device) then
526 call device_sync_stream(stream)
527 end if
528#elif HAVE_OPENCL
529 if (sync_device) then
530 if (dir .eq. host_to_device) then
531 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
532 ptr_h, 0, c_null_ptr, c_null_ptr) &
533 .ne. cl_success) then
534 call neko_error('Device memcpy (host-to-device) failed')
535 end if
536 else if (dir .eq. device_to_host) then
537 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
538 0, c_null_ptr, c_null_ptr) &
539 .ne. cl_success) then
540 call neko_error('Device memcpy (device-to-host) failed')
541 end if
542 else if (dir .eq. device_to_device) then
543 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
544 0, c_null_ptr, c_null_ptr) &
545 .ne. cl_success) then
546 call neko_error('Device memcpy (device-to-device) failed')
547 end if
548 else
549 call neko_error('Device memcpy failed (invalid direction')
550 end if
551 else
552 if (dir .eq. host_to_device) then
553 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
554 ptr_h, 0, c_null_ptr, c_null_ptr) &
555 .ne. cl_success) then
556 call neko_error('Device memcpy (host-to-device) failed')
557 end if
558 else if (dir .eq. device_to_host) then
559 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
560 0, c_null_ptr, c_null_ptr) &
561 .ne. cl_success) then
562 call neko_error('Device memcpy (device-to-host) failed')
563 end if
564 else if (dir .eq. device_to_device) then
565 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
566 0, c_null_ptr, c_null_ptr) &
567 .ne. cl_success) then
568 call neko_error('Device memcpy (device-to-device) failed')
569 end if
570 else
571 call neko_error('Device memcpy failed (invalid direction')
572 end if
573 end if
574#endif
575 end subroutine device_memcpy_common
576
578 subroutine device_associate_r1(x, x_d)
579 class(*), intent(inout), target :: x(:)
580 type(c_ptr), intent(inout) :: x_d
581 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
582
583 select type (x)
584 type is (integer)
585 htbl_ptr_h%ptr = c_loc(x)
586 type is (integer(i8))
587 htbl_ptr_h%ptr = c_loc(x)
588 type is (real)
589 htbl_ptr_h%ptr = c_loc(x)
590 type is (double precision)
591 htbl_ptr_h%ptr = c_loc(x)
592 class default
593 call neko_error('Unknown Fortran type')
594 end select
595
596 htbl_ptr_d%ptr = x_d
597
598 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
599
600 end subroutine device_associate_r1
601
603 subroutine device_associate_r2(x, x_d)
604 class(*), intent(inout), target :: x(:,:)
605 type(c_ptr), intent(inout) :: x_d
606 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
607
608 select type (x)
609 type is (integer)
610 htbl_ptr_h%ptr = c_loc(x)
611 type is (integer(i8))
612 htbl_ptr_h%ptr = c_loc(x)
613 type is (real)
614 htbl_ptr_h%ptr = c_loc(x)
615 type is (double precision)
616 htbl_ptr_h%ptr = c_loc(x)
617 class default
618 call neko_error('Unknown Fortran type')
619 end select
620
621 htbl_ptr_d%ptr = x_d
622
623 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
624
625 end subroutine device_associate_r2
626
628 subroutine device_associate_r3(x, x_d)
629 class(*), intent(inout), target :: x(:,:,:)
630 type(c_ptr), intent(inout) :: x_d
631 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
632
633 select type (x)
634 type is (integer)
635 htbl_ptr_h%ptr = c_loc(x)
636 type is (integer(i8))
637 htbl_ptr_h%ptr = c_loc(x)
638 type is (real)
639 htbl_ptr_h%ptr = c_loc(x)
640 type is (double precision)
641 htbl_ptr_h%ptr = c_loc(x)
642 class default
643 call neko_error('Unknown Fortran type')
644 end select
645
646 htbl_ptr_d%ptr = x_d
647
648 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
649
650 end subroutine device_associate_r3
651
653 subroutine device_associate_r4(x, x_d)
654 class(*), intent(inout), target :: x(:,:,:,:)
655 type(c_ptr), intent(inout) :: x_d
656 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
657
658 select type (x)
659 type is (integer)
660 htbl_ptr_h%ptr = c_loc(x)
661 type is (integer(i8))
662 htbl_ptr_h%ptr = c_loc(x)
663 type is (real)
664 htbl_ptr_h%ptr = c_loc(x)
665 type is (double precision)
666 htbl_ptr_h%ptr = c_loc(x)
667 class default
668 call neko_error('Unknown Fortran type')
669 end select
670
671 htbl_ptr_d%ptr = x_d
672
673 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
674
675 end subroutine device_associate_r4
676
679 class(*), intent(inout), target :: x(:)
680 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
681
682 select type (x)
683 type is (integer)
684 htbl_ptr_h%ptr = c_loc(x)
685 type is (integer(i8))
686 htbl_ptr_h%ptr = c_loc(x)
687 type is (real)
688 htbl_ptr_h%ptr = c_loc(x)
689 type is (double precision)
690 htbl_ptr_h%ptr = c_loc(x)
691 class default
692 call neko_error('Unknown Fortran type')
693 end select
694
695 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
696 call device_addrtbl%remove(htbl_ptr_h)
697 end if
698
699 end subroutine device_deassociate_r1
700
703 class(*), intent(inout), target :: x(:,:)
704 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
705
706 select type (x)
707 type is (integer)
708 htbl_ptr_h%ptr = c_loc(x)
709 type is (integer(i8))
710 htbl_ptr_h%ptr = c_loc(x)
711 type is (real)
712 htbl_ptr_h%ptr = c_loc(x)
713 type is (double precision)
714 htbl_ptr_h%ptr = c_loc(x)
715 class default
716 call neko_error('Unknown Fortran type')
717 end select
718
719 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
720 call device_addrtbl%remove(htbl_ptr_h)
721 end if
722
723 end subroutine device_deassociate_r2
724
727 class(*), intent(inout), target :: x(:,:,:)
728 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
729
730 select type (x)
731 type is (integer)
732 htbl_ptr_h%ptr = c_loc(x)
733 type is (integer(i8))
734 htbl_ptr_h%ptr = c_loc(x)
735 type is (real)
736 htbl_ptr_h%ptr = c_loc(x)
737 type is (double precision)
738 htbl_ptr_h%ptr = c_loc(x)
739 class default
740 call neko_error('Unknown Fortran type')
741 end select
742
743 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
744 call device_addrtbl%remove(htbl_ptr_h)
745 end if
746
747 end subroutine device_deassociate_r3
748
751 class(*), intent(inout), target :: x(:,:,:,:)
752 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
753
754 select type (x)
755 type is (integer)
756 htbl_ptr_h%ptr = c_loc(x)
757 type is (integer(i8))
758 htbl_ptr_h%ptr = c_loc(x)
759 type is (real)
760 htbl_ptr_h%ptr = c_loc(x)
761 type is (double precision)
762 htbl_ptr_h%ptr = c_loc(x)
763 class default
764 call neko_error('Unknown Fortran type')
765 end select
766
767 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
768 call device_addrtbl%remove(htbl_ptr_h)
769 end if
770
771 end subroutine device_deassociate_r4
772
774 subroutine device_map_r1(x, x_d, n)
775 integer, intent(in) :: n
776 class(*), intent(inout), target :: x(:)
777 type(c_ptr), intent(inout) :: x_d
778 integer(c_size_t) :: s
779
780 if (c_associated(x_d)) then
781 call neko_error('Device pointer already associated')
782 end if
783
784 select type (x)
785 type is (integer)
786 s = n * int(4, c_size_t)
787 type is (integer(i8))
788 s = n * int(8, c_size_t)
789 type is (real)
790 s = n * int(4, c_size_t)
791 type is (double precision)
792 s = n * int(8, c_size_t)
793 class default
794 call neko_error('Unknown Fortran type')
795 end select
796
797 call device_alloc(x_d, s)
798 call device_associate(x, x_d)
799
800 end subroutine device_map_r1
801
803 subroutine device_map_r2(x, x_d, n)
804 integer, intent(in) :: n
805 class(*), intent(inout), target :: x(:,:)
806 type(c_ptr), intent(inout) :: x_d
807 integer(c_size_t) :: s
808
809 if (c_associated(x_d)) then
810 call neko_error('Device pointer already associated')
811 end if
812
813 select type (x)
814 type is (integer)
815 s = n * int(4, c_size_t)
816 type is (integer(i8))
817 s = n * int(8, c_size_t)
818 type is (real)
819 s = n * int(4, c_size_t)
820 type is (double precision)
821 s = n * int(8, c_size_t)
822 class default
823 call neko_error('Unknown Fortran type')
824 end select
825
826 call device_alloc(x_d, s)
827 call device_associate(x, x_d)
828
829 end subroutine device_map_r2
830
832 subroutine device_map_r3(x, x_d, n)
833 integer, intent(in) :: n
834 class(*), intent(inout), target :: x(:,:,:)
835 type(c_ptr), intent(inout) :: x_d
836 integer(c_size_t) :: s
837
838 if (c_associated(x_d)) then
839 call neko_error('Device pointer already associated')
840 end if
841
842 select type (x)
843 type is (integer)
844 s = n * int(4, c_size_t)
845 type is (integer(i8))
846 s = n * int(8, c_size_t)
847 type is (real)
848 s = n * int(4, c_size_t)
849 type is (double precision)
850 s = n * int(8, c_size_t)
851 class default
852 call neko_error('Unknown Fortran type')
853 end select
854
855 call device_alloc(x_d, s)
856 call device_associate(x, x_d)
857
858 end subroutine device_map_r3
859
861 subroutine device_map_r4(x, x_d, n)
862 integer, intent(in) :: n
863 class(*), intent(inout), target :: x(:,:,:,:)
864 type(c_ptr), intent(inout) :: x_d
865 integer(c_size_t) :: s
866
867 if (c_associated(x_d)) then
868 call neko_error('Device pointer already associated')
869 end if
870
871 select type (x)
872 type is (integer)
873 s = n * int(4, c_size_t)
874 type is (integer(i8))
875 s = n * int(8, c_size_t)
876 type is (real)
877 s = n * int(4, c_size_t)
878 type is (double precision)
879 s = n * int(8, c_size_t)
880 class default
881 call neko_error('Unknown Fortran type')
882 end select
883
884 call device_alloc(x_d, s)
885 call device_associate(x, x_d)
886
887 end subroutine device_map_r4
888
890 subroutine device_unmap_r1(x, x_d)
891 class(*), intent(inout), target :: x(:)
892 type(c_ptr), intent(inout) :: x_d
893 type(c_ptr) :: dev
894 logical :: mapped
895
896 ! Whether dev has a non-null address, meaning that x is mapped.
897 mapped = device_associated(x)
898
899 ! Repeated calls to this routine do nothing
900 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
901 return
902 end if
903
904 ! Device pointer associated with x, should be same as x_d if mapped
905 if (mapped) then
906 dev = device_get_ptr(x)
907 else
908 dev = c_null_ptr
909 end if
910
911 ! Error if:
912 ! 1) x is not mapped to a device pointer, but x_d is not null.
913 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
914 ! 3) x is mapped to a device pointer that is not x_d.
915 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
916 (.not. c_associated(dev, x_d))) then
917 call neko_error('Inconsistent host/device mapping state in ' // &
918 'device_unmap')
919 end if
920
921 call device_deassociate(x)
922 call device_free(x_d)
923
924 end subroutine device_unmap_r1
925
927 subroutine device_unmap_r2(x, x_d)
928 class(*), intent(inout), target :: x(:,:)
929 type(c_ptr), intent(inout) :: x_d
930 type(c_ptr) :: dev
931 logical :: mapped
932
933 ! Whether dev has a non-null address, meaning that x is mapped.
934 mapped = device_associated(x)
935
936 ! Repeated calls to this routine do nothing
937 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
938 return
939 end if
940
941 ! Device pointer associated with x, should be same as x_d if mapped
942 if (mapped) then
943 dev = device_get_ptr(x)
944 else
945 dev = c_null_ptr
946 end if
947
948 ! Error if:
949 ! 1) x is not mapped to a device pointer, but x_d is not null.
950 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
951 ! 3) x is mapped to a device pointer that is not x_d.
952 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
953 (.not. c_associated(dev, x_d))) then
954 call neko_error('Inconsistent host/device mapping state in ' // &
955 'device_unmap')
956 end if
957
958 call device_deassociate(x)
959 call device_free(x_d)
960
961 end subroutine device_unmap_r2
962
964 subroutine device_unmap_r3(x, x_d)
965 class(*), intent(inout), target :: x(:,:,:)
966 type(c_ptr), intent(inout) :: x_d
967 type(c_ptr) :: dev
968 logical :: mapped
969
970 ! Whether dev has a non-null address, meaning that x is mapped.
971 mapped = device_associated(x)
972
973 ! Repeated calls to this routine do nothing
974 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
975 return
976 end if
977
978 ! Device pointer associated with x, should be same as x_d if mapped
979 if (mapped) then
980 dev = device_get_ptr(x)
981 else
982 dev = c_null_ptr
983 end if
984
985 ! Error if:
986 ! 1) x is not mapped to a device pointer, but x_d is not null.
987 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
988 ! 3) x is mapped to a device pointer that is not x_d.
989 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
990 (.not. c_associated(dev, x_d))) then
991 call neko_error('Inconsistent host/device mapping state in ' // &
992 'device_unmap')
993 end if
994
995 call device_deassociate(x)
996 call device_free(x_d)
997
998 end subroutine device_unmap_r3
999
1001 subroutine device_unmap_r4(x, x_d)
1002 class(*), intent(inout), target :: x(:,:,:,:)
1003 type(c_ptr), intent(inout) :: x_d
1004 type(c_ptr) :: dev
1005 logical :: mapped
1006
1007 ! Whether dev has a non-null address, meaning that x is mapped.
1008 mapped = device_associated(x)
1009
1010 ! Repeated calls to this routine do nothing
1011 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1012 return
1013 end if
1014
1015 ! Device pointer associated with x, should be same as x_d if mapped
1016 if (mapped) then
1017 dev = device_get_ptr(x)
1018 else
1019 dev = c_null_ptr
1020 end if
1021
1022 ! Error if:
1023 ! 1) x is not mapped to a device pointer, but x_d is not null.
1024 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1025 ! 3) x is mapped to a device pointer that is not x_d.
1026 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1027 (.not. c_associated(dev, x_d))) then
1028 call neko_error('Inconsistent host/device mapping state in ' // &
1029 'device_unmap')
1030 end if
1031
1032 call device_deassociate(x)
1033 call device_free(x_d)
1034
1035 end subroutine device_unmap_r4
1036
1038 function device_associated_r1(x) result(assoc)
1039 class(*), intent(inout), target :: x(:)
1040 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1041 logical :: assoc
1042
1043 select type (x)
1044 type is (integer)
1045 htbl_ptr_h%ptr = c_loc(x)
1046 type is (integer(i8))
1047 htbl_ptr_h%ptr = c_loc(x)
1048 type is (real)
1049 htbl_ptr_h%ptr = c_loc(x)
1050 type is (double precision)
1051 htbl_ptr_h%ptr = c_loc(x)
1052 class default
1053 call neko_error('Unknown Fortran type')
1054 end select
1055
1056 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1057 assoc = .true.
1058 else
1059 assoc = .false.
1060 end if
1061
1062 end function device_associated_r1
1063
1065 function device_associated_r2(x) result(assoc)
1066 class(*), intent(inout), target :: x(:,:)
1067 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1068 logical :: assoc
1069
1070 select type (x)
1071 type is (integer)
1072 htbl_ptr_h%ptr = c_loc(x)
1073 type is (integer(i8))
1074 htbl_ptr_h%ptr = c_loc(x)
1075 type is (real)
1076 htbl_ptr_h%ptr = c_loc(x)
1077 type is (double precision)
1078 htbl_ptr_h%ptr = c_loc(x)
1079 class default
1080 call neko_error('Unknown Fortran type')
1081 end select
1082
1083 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1084 assoc = .true.
1085 else
1086 assoc = .false.
1087 end if
1088
1089 end function device_associated_r2
1090
1092 function device_associated_r3(x) result(assoc)
1093 class(*), intent(inout), target :: x(:,:,:)
1094 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1095 logical :: assoc
1096
1097 select type (x)
1098 type is (integer)
1099 htbl_ptr_h%ptr = c_loc(x)
1100 type is (integer(i8))
1101 htbl_ptr_h%ptr = c_loc(x)
1102 type is (real)
1103 htbl_ptr_h%ptr = c_loc(x)
1104 type is (double precision)
1105 htbl_ptr_h%ptr = c_loc(x)
1106 class default
1107 call neko_error('Unknown Fortran type')
1108 end select
1109
1110 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1111 assoc = .true.
1112 else
1113 assoc = .false.
1114 end if
1115
1116 end function device_associated_r3
1117
1119 function device_associated_r4(x) result(assoc)
1120 class(*), intent(inout), target :: x(:,:,:,:)
1121 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1122 logical :: assoc
1123
1124 select type (x)
1125 type is (integer)
1126 htbl_ptr_h%ptr = c_loc(x)
1127 type is (integer(i8))
1128 htbl_ptr_h%ptr = c_loc(x)
1129 type is (real)
1130 htbl_ptr_h%ptr = c_loc(x)
1131 type is (double precision)
1132 htbl_ptr_h%ptr = c_loc(x)
1133 class default
1134 call neko_error('Unknown Fortran type')
1135 end select
1136
1137 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1138 assoc = .true.
1139 else
1140 assoc = .false.
1141 end if
1142
1143 end function device_associated_r4
1144
1147 class(*), intent(in), target :: x(:)
1148 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1149 type(c_ptr) :: device_get_ptr_r1
1150
1151 device_get_ptr_r1 = c_null_ptr
1152
1153 select type (x)
1154 type is (integer)
1155 htbl_ptr_h%ptr = c_loc(x)
1156 type is (integer(i8))
1157 htbl_ptr_h%ptr = c_loc(x)
1158 type is (real)
1159 htbl_ptr_h%ptr = c_loc(x)
1160 type is (double precision)
1161 htbl_ptr_h%ptr = c_loc(x)
1162 class default
1163 call neko_error('Unknown Fortran type')
1164 end select
1165
1166 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1167 device_get_ptr_r1 = htbl_ptr_d%ptr
1168 else
1169 call neko_error('Array not associated with device')
1170 end if
1171 end function device_get_ptr_r1
1172
1175 class(*), intent(in), target :: x(:,:)
1176 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1177 type(c_ptr) :: device_get_ptr_r2
1178
1179 device_get_ptr_r2 = c_null_ptr
1180
1181 select type (x)
1182 type is (integer)
1183 htbl_ptr_h%ptr = c_loc(x)
1184 type is (integer(i8))
1185 htbl_ptr_h%ptr = c_loc(x)
1186 type is (real)
1187 htbl_ptr_h%ptr = c_loc(x)
1188 type is (double precision)
1189 htbl_ptr_h%ptr = c_loc(x)
1190 class default
1191 call neko_error('Unknown Fortran type')
1192 end select
1193
1194 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1195 device_get_ptr_r2 = htbl_ptr_d%ptr
1196 else
1197 call neko_error('Array not associated with device')
1198 end if
1199 end function device_get_ptr_r2
1200
1203 class(*), intent(in), target :: x(:,:,:)
1204 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1205 type(c_ptr) :: device_get_ptr_r3
1206
1207 device_get_ptr_r3 = c_null_ptr
1208
1209 select type (x)
1210 type is (integer)
1211 htbl_ptr_h%ptr = c_loc(x)
1212 type is (integer(i8))
1213 htbl_ptr_h%ptr = c_loc(x)
1214 type is (real)
1215 htbl_ptr_h%ptr = c_loc(x)
1216 type is (double precision)
1217 htbl_ptr_h%ptr = c_loc(x)
1218 class default
1219 call neko_error('Unknown Fortran type')
1220 end select
1221
1222 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1223 device_get_ptr_r3 = htbl_ptr_d%ptr
1224 else
1225 call neko_error('Array not associated with device')
1226 end if
1227 end function device_get_ptr_r3
1228
1231 class(*), intent(in), target :: x(:,:,:,:)
1232 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1233 type(c_ptr) :: device_get_ptr_r4
1234
1235 device_get_ptr_r4 = c_null_ptr
1236
1237 select type (x)
1238 type is (integer)
1239 htbl_ptr_h%ptr = c_loc(x)
1240 type is (integer(i8))
1241 htbl_ptr_h%ptr = c_loc(x)
1242 type is (real)
1243 htbl_ptr_h%ptr = c_loc(x)
1244 type is (double precision)
1245 htbl_ptr_h%ptr = c_loc(x)
1246 class default
1247 call neko_error('Unknown Fortran type')
1248 end select
1249
1250 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1251 device_get_ptr_r4 = htbl_ptr_d%ptr
1252 else
1253 call neko_error('Array not associated with device')
1254 end if
1255 end function device_get_ptr_r4
1256
1259#ifdef HAVE_HIP
1260 if (hipdevicesynchronize() .ne. hipsuccess) then
1261 call neko_error('Error during device sync')
1262 end if
1263#elif HAVE_CUDA
1264 if (cudadevicesynchronize() .ne. cudasuccess) then
1265 call neko_error('Error during device sync')
1266 end if
1267#elif HAVE_OPENCL
1268 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1269 call neko_error('Error during device sync')
1270 end if
1271#endif
1272 end subroutine device_sync_device
1273
1275 subroutine device_sync_stream(stream)
1276 type(c_ptr), intent(in) :: stream
1277#ifdef HAVE_HIP
1278 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1279 call neko_error('Error during stream sync')
1280 end if
1281#elif HAVE_CUDA
1282 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1283 call neko_error('Error during stream sync')
1284 end if
1285#elif HAVE_OPENCL
1286 if (clfinish(stream) .ne. cl_success) then
1287 call neko_error('Error during stream sync')
1288 end if
1289#endif
1290 end subroutine device_sync_stream
1291
1293 subroutine device_stream_create(stream, flags)
1294 type(c_ptr), intent(inout) :: stream
1295 integer, optional :: flags
1296 integer :: ierr
1297#ifdef HAVE_HIP
1298 if (present(flags)) then
1299 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1300 call neko_error('Error during stream create (w. flags)')
1301 end if
1302 else
1303 if (hipstreamcreate(stream) .ne. hipsuccess) then
1304 call neko_error('Error during stream create')
1305 end if
1306 end if
1307#elif HAVE_CUDA
1308 if (present(flags)) then
1309 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1310 call neko_error('Error during stream create (w. flags)')
1311 end if
1312 else
1313 if (cudastreamcreate(stream) .ne. cudasuccess) then
1314 call neko_error('Error during stream create')
1315 end if
1316 end if
1317#elif HAVE_OPENCL
1318 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1319 if (ierr .ne. cl_success) then
1320 call neko_error('Error during stream create')
1321 end if
1322#endif
1323 end subroutine device_stream_create
1324
1326 subroutine device_stream_create_with_priority(stream, flags, prio)
1327 type(c_ptr), intent(inout) :: stream
1328 integer, intent(in) :: flags, prio
1329#ifdef HAVE_HIP
1330 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1331 call neko_error('Error during stream create (w. priority)')
1332 end if
1333#elif HAVE_CUDA
1334 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1335 call neko_error('Error during stream create (w. priority)')
1336 end if
1337#elif HAVE_OPENCL
1338 call neko_error('Not implemented yet')
1339#endif
1341
1343 subroutine device_stream_destroy(stream)
1344 type(c_ptr), intent(inout) :: stream
1345#ifdef HAVE_HIP
1346 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1347 call neko_error('Error during stream destroy')
1348 end if
1349#elif HAVE_CUDA
1350 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1351 call neko_error('Error during stream destroy')
1352 end if
1353#elif HAVE_OPENCL
1354 if (clreleasecommandqueue(stream) .ne. cl_success) then
1355 call neko_error('Error during stream destroy')
1356 end if
1357#endif
1358 end subroutine device_stream_destroy
1359
1361 subroutine device_stream_wait_event(stream, event, flags)
1362 type(c_ptr), intent(in) :: stream
1363 type(c_ptr), target, intent(in) :: event
1364 integer :: flags
1365#ifdef HAVE_HIP
1366 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1367 call neko_error('Error during stream sync')
1368 end if
1369#elif HAVE_CUDA
1370 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1371 call neko_error('Error during stream sync')
1372 end if
1373#elif HAVE_OPENCL
1374 if (clenqueuebarrier(stream) .ne. cl_success) then
1375 call neko_error('Error during barrier')
1376 end if
1377 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1378 call neko_error('Error during stream sync')
1379 end if
1380#endif
1381 end subroutine device_stream_wait_event
1382
1385#if HAVE_CUDA
1386 if (cudaprofilerstart() .ne. cudasuccess) then
1387 call neko_error('Error starting profiler')
1388 end if
1389#endif
1390 end subroutine device_profiler_start
1391
1394#if HAVE_CUDA
1395 if (cudaprofilerstop() .ne. cudasuccess) then
1396 call neko_error('Error stopping profiler')
1397 end if
1398#endif
1399 end subroutine device_profiler_stop
1400
1402 subroutine device_event_create(event, flags)
1403 type(c_ptr), intent(inout) :: event
1404 integer, optional :: flags
1405 integer :: ierr
1406#ifdef HAVE_HIP
1407 if (present(flags)) then
1408 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1409 call neko_error('Error during event create (w. flags)')
1410 end if
1411 else
1412 if (hipeventcreate(event) .ne. hipsuccess) then
1413 call neko_error('Error during event create')
1414 end if
1415 end if
1416#elif HAVE_CUDA
1417 if (present(flags)) then
1418 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1419 call neko_error('Error during event create (w. flags)')
1420 end if
1421 else
1422 if (cudaeventcreate(event) .ne. cudasuccess) then
1423 call neko_error('Error during event create')
1424 end if
1425 end if
1426#elif HAVE_OPENCL
1427 event = c_null_ptr
1428#endif
1429 end subroutine device_event_create
1430
1432 subroutine device_event_destroy(event)
1433 type(c_ptr), intent(inout) :: event
1434#ifdef HAVE_HIP
1435 if (hipeventdestroy(event) .ne. hipsuccess) then
1436 call neko_error('Error during event destroy')
1437 end if
1438#elif HAVE_CUDA
1439 if (cudaeventdestroy(event) .ne. cudasuccess) then
1440 call neko_error('Error during event destroy')
1441 end if
1442#elif HAVE_OPENCL
1443 event = c_null_ptr
1444#endif
1445 end subroutine device_event_destroy
1446
1448 subroutine device_event_record(event, stream)
1449 type(c_ptr), target, intent(in) :: event
1450 type(c_ptr), intent(in) :: stream
1451#ifdef HAVE_HIP
1452 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1453 call neko_error('Error recording an event')
1454 end if
1455#elif HAVE_CUDA
1456 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1457 call neko_error('Error recording an event')
1458 end if
1459#elif HAVE_OPENCL
1460 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1461 call neko_error('Error recording an event')
1462 end if
1463#endif
1464 end subroutine device_event_record
1465
1467 subroutine device_event_sync(event)
1468 type(c_ptr), target, intent(in) :: event
1469#ifdef HAVE_HIP
1470 if (hipeventsynchronize(event) .ne. hipsuccess) then
1471 call neko_error('Error during event sync')
1472 end if
1473#elif HAVE_CUDA
1474 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1475 call neko_error('Error during event sync')
1476 end if
1477#elif HAVE_OPENCL
1478 if (c_associated(event)) then
1479 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1480 call neko_error('Error during event sync')
1481 end if
1482 end if
1483#endif
1484 end subroutine device_event_sync
1485
1486end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:89
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:95
Deassociate a Fortran array from a device pointer.
Definition device.F90:101
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:113
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:83
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1449
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1468
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:604
subroutine, public device_finalize
Definition device.F90:156
integer, parameter, public device_to_device
Definition device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1231
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:1147
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:68
integer, parameter, public host_to_device
Definition device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:833
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
Definition device.F90:58
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:470
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:1093
subroutine device_unmap_r4(x, x_d)
Unmap a Fortran rank 4 array from a device (deassociate and free)
Definition device.F90:1002
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:118
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1394
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:727
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1276
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:1203
subroutine device_unmap_r2(x, x_d)
Unmap a Fortran rank 2 array from a device (deassociate and free)
Definition device.F90:928
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1385
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:804
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:325
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:862
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:441
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1433
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:198
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1327
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1294
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:751
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1259
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:579
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:401
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1362
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:775
subroutine device_unmap_r1(x, x_d)
Unmap a Fortran rank 1 array from a device (deassociate and free)
Definition device.F90:891
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:287
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1403
integer function, public device_count()
Return the number of available devices.
Definition device.F90:185
subroutine, public device_name(name)
Definition device.F90:172
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:1120
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:1066
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:65
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:1175
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:654
subroutine device_unmap_r3(x, x_d)
Unmap a Fortran rank 3 array from a device (deassociate and free)
Definition device.F90:965
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:679
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:62
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:703
subroutine, public device_init
Definition device.F90:134
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:629
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:1039
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
Definition device.F90:244
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:363
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1344
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:271
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:236
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:258
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:288
Implements a hash table ADT.
Definition htable.f90:52
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:128
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:162