Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
40 use htable, only : htable_cptr_t, h_cptr_t
41 use utils, only : neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49
51 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
52
54 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
55
57 type(c_ptr), public, bind(c) :: glb_cmd_event
58
60 integer, public :: strm_high_prio
61
63 integer, public :: strm_low_prio
64
69 end interface device_memcpy
70
72 interface device_map
73 module procedure device_map_r1, device_map_r2, &
75 end interface device_map
76
81 end interface device_associate
82
87 end interface device_associated
88
93 end interface device_deassociate
94
99 end interface device_get_ptr
100
102 interface device_sync
103 module procedure device_sync_device, device_sync_stream
104 end interface device_sync
105
108
117
118 private :: device_memcpy_common
119
120contains
121
122 subroutine device_init
123#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
124 call device_addrtbl%init(64)
125
126#ifdef HAVE_HIP
128#elif HAVE_CUDA
130#elif HAVE_OPENCL
132#endif
134#endif
135
136 ! Check the device count against the number of MPI ranks
137 if (neko_bcknd_device .eq. 1) then
138 if (device_count() .ne. 1) then
139 call neko_error('Only one device is supported per MPI rank')
140 end if
141 end if
142 end subroutine device_init
143
145#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
146 call device_addrtbl%free()
147
148#ifdef HAVE_HIP
150#elif HAVE_CUDA
152#elif HAVE_OPENCL
155#endif
157#endif
158 end subroutine device_finalize
159
160 subroutine device_name(name)
161 character(len=*), intent(inout) :: name
162
163#ifdef HAVE_HIP
164 call hip_device_name(name)
165#elif HAVE_CUDA
166 call cuda_device_name(name)
167#elif HAVE_OPENCL
168 call opencl_device_name(name)
169#endif
170 end subroutine device_name
171
173 integer function device_count()
174#ifdef HAVE_HIP
176#elif HAVE_CUDA
178#elif HAVE_OPENCL
180#else
181 device_count = 0
182#endif
183 end function device_count
184
186 subroutine device_alloc(x_d, s)
187 type(c_ptr), intent(inout) :: x_d
188 integer(c_size_t) :: s
189 integer :: ierr
190
191 if (s .eq. 0) then
192 call device_sync()
193 x_d = c_null_ptr
194 return
195 end if
196#ifdef HAVE_HIP
197 if (hipmalloc(x_d, s) .ne. hipsuccess) then
198 call neko_error('Memory allocation on device failed')
199 end if
200#elif HAVE_CUDA
201 if (cudamalloc(x_d, s) .ne. cudasuccess) then
202 call neko_error('Memory allocation on device failed')
203 end if
204#elif HAVE_OPENCL
205 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
206 if (ierr .ne. cl_success) then
207 call neko_error('Memory allocation on device failed')
208 end if
209#endif
210 end subroutine device_alloc
211
213 subroutine device_free(x_d)
214 type(c_ptr), intent(inout) :: x_d
215#ifdef HAVE_HIP
216 if (hipfree(x_d) .ne. hipsuccess) then
217 call neko_error('Memory deallocation on device failed')
218 end if
219#elif HAVE_CUDA
220 if (cudafree(x_d) .ne. cudasuccess) then
221 call neko_error('Memory deallocation on device failed')
222 end if
223#elif HAVE_OPENCL
224 if (clreleasememobject(x_d) .ne. cl_success) then
225 call neko_error('Memory deallocation on device failed')
226 end if
227#endif
228 x_d = c_null_ptr
229 end subroutine device_free
230
232 subroutine device_memset(x_d, v, s, sync, strm)
233 type(c_ptr), intent(inout) :: x_d
234 integer(c_int), target, value :: v
235 integer(c_size_t), intent(in) :: s
236 logical, optional :: sync
237 type(c_ptr), optional :: strm
238 type(c_ptr) :: stream
239 logical :: sync_device
240
241 if (present(sync)) then
242 sync_device = sync
243 else
244 sync_device = .false.
245 end if
246
247 if (present(strm)) then
248 stream = strm
249 else
250 stream = glb_cmd_queue
251 end if
252
253#ifdef HAVE_HIP
254 if (hipmemsetasync(x_d, v, s, stream) .ne. hipsuccess) then
255 call neko_error('Device memset async failed')
256 end if
257#elif HAVE_CUDA
258 if (cudamemsetasync(x_d, v, s, stream) .ne. cudasuccess) then
259 call neko_error('Device memset async failed')
260 end if
261#elif HAVE_OPENCL
262 if (clenqueuefillbuffer(stream, x_d, c_loc(v), c_sizeof(v), 0_i8, &
263 s, 0, c_null_ptr, c_null_ptr) .ne. cl_success) then
264 call neko_error('Device memset async failed')
265 end if
266#endif
267
268 if (sync_device) then
269 call device_sync_stream(stream)
270 end if
271
272 end subroutine device_memset
273
275 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
276 integer, intent(in) :: n
277 class(*), intent(inout), target :: x(:)
278 type(c_ptr), intent(inout) :: x_d
279 integer, intent(in), value :: dir
280 logical :: sync
281 type(c_ptr), optional :: strm
282 type(c_ptr) :: ptr_h, copy_stream
283 integer(c_size_t) :: s
284
285 if (present(strm)) then
286 copy_stream = strm
287 else
288 copy_stream = glb_cmd_queue
289 end if
290
291 select type (x)
292 type is (integer)
293 s = n * int(4, c_size_t)
294 ptr_h = c_loc(x)
295 type is (integer(i8))
296 s = n * int(8, c_size_t)
297 ptr_h = c_loc(x)
298 type is (real)
299 s = n * int(4, c_size_t)
300 ptr_h = c_loc(x)
301 type is (double precision)
302 s = n * int(8, c_size_t)
303 ptr_h = c_loc(x)
304 class default
305 call neko_error('Unknown Fortran type')
306 end select
307
308 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
309
310 end subroutine device_memcpy_r1
311
313 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
314 integer, intent(in) :: n
315 class(*), intent(inout), target :: x(:,:)
316 type(c_ptr), intent(inout) :: x_d
317 integer, intent(in), value :: dir
318 logical :: sync
319 type(c_ptr), optional :: strm
320 type(c_ptr) :: ptr_h, copy_stream
321 integer(c_size_t) :: s
322
323 if (present(strm)) then
324 copy_stream = strm
325 else
326 copy_stream = glb_cmd_queue
327 end if
328
329 select type (x)
330 type is (integer)
331 s = n * int(4, c_size_t)
332 ptr_h = c_loc(x)
333 type is (integer(i8))
334 s = n * int(8, c_size_t)
335 ptr_h = c_loc(x)
336 type is (real)
337 s = n * int(4, c_size_t)
338 ptr_h = c_loc(x)
339 type is (double precision)
340 s = n * int(8, c_size_t)
341 ptr_h = c_loc(x)
342 class default
343 call neko_error('Unknown Fortran type')
344 end select
345
346 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
347
348 end subroutine device_memcpy_r2
349
351 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
352 integer, intent(in) :: n
353 class(*), intent(inout), target :: x(:,:,:)
354 type(c_ptr), intent(inout) :: x_d
355 integer, intent(in), value :: dir
356 logical :: sync
357 type(c_ptr), optional :: strm
358 type(c_ptr) :: ptr_h, copy_stream
359 integer(c_size_t) :: s
360
361 if (present(strm)) then
362 copy_stream = strm
363 else
364 copy_stream = glb_cmd_queue
365 end if
366
367 select type (x)
368 type is (integer)
369 s = n * int(4, c_size_t)
370 ptr_h = c_loc(x)
371 type is (integer(i8))
372 s = n * int(8, c_size_t)
373 ptr_h = c_loc(x)
374 type is (real)
375 s = n * int(4, c_size_t)
376 ptr_h = c_loc(x)
377 type is (double precision)
378 s = n * int(8, c_size_t)
379 ptr_h = c_loc(x)
380 class default
381 call neko_error('Unknown Fortran type')
382 end select
383
384 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
385
386 end subroutine device_memcpy_r3
387
389 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
390 integer, intent(in) :: n
391 class(*), intent(inout), target :: x(:,:,:,:)
392 type(c_ptr), intent(inout) :: x_d
393 integer, intent(in), value :: dir
394 logical :: sync
395 type(c_ptr), optional :: strm
396 type(c_ptr) :: ptr_h, copy_stream
397 integer(c_size_t) :: s
398
399 if (present(strm)) then
400 copy_stream = strm
401 else
402 copy_stream = glb_cmd_queue
403 end if
404
405 select type (x)
406 type is (integer)
407 s = n * int(4, c_size_t)
408 ptr_h = c_loc(x)
409 type is (integer(i8))
410 s = n * int(8, c_size_t)
411 ptr_h = c_loc(x)
412 type is (real)
413 s = n * int(4, c_size_t)
414 ptr_h = c_loc(x)
415 type is (double precision)
416 s = n * int(8, c_size_t)
417 ptr_h = c_loc(x)
418 class default
419 call neko_error('Unknown Fortran type')
420 end select
421
422 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
423
424 end subroutine device_memcpy_r4
425
429 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
430 type(c_ptr), intent(inout) :: dst
431 type(c_ptr), intent(inout) :: src
432 integer(c_size_t), intent(in) :: s
433 integer, intent(in), value :: dir
434 logical, optional :: sync
435 type(c_ptr), optional :: strm
436 type(c_ptr) :: copy_stream
437 logical :: sync_device
438
439 if (present(sync)) then
440 sync_device = sync
441 else
442 sync_device = .false.
443 end if
444
445 if (present(strm)) then
446 copy_stream = strm
447 else
448 copy_stream = glb_cmd_queue
449 end if
450
451 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
452
453 end subroutine device_memcpy_cptr
454
458 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
459 type(c_ptr), intent(inout) :: ptr_h
460 type(c_ptr), intent(inout) :: x_d
461 integer(c_size_t), intent(in) :: s
462 integer, intent(in), value :: dir
463 logical, intent(in) :: sync_device
464 type(c_ptr), intent(inout) :: stream
465
466 if (s .eq. 0) then
467 if (sync_device) then
468 call device_sync_stream(stream)
469 end if
470 return
471 end if
472
473#ifdef HAVE_HIP
474 if (dir .eq. host_to_device) then
475 if (hipmemcpyasync(x_d, ptr_h, s, &
476 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
477 call neko_error('Device memcpy async (host-to-device) failed')
478 end if
479 else if (dir .eq. device_to_host) then
480 if (hipmemcpyasync(ptr_h, x_d, s, &
481 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
482 call neko_error('Device memcpy async (device-to-host) failed')
483 end if
484 else if (dir .eq. device_to_device) then
485 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
486 .ne. hipsuccess) then
487 call neko_error('Device memcpy async (device-to-device) failed')
488 end if
489 else
490 call neko_error('Device memcpy failed (invalid direction')
491 end if
492 if (sync_device) then
493 call device_sync_stream(stream)
494 end if
495#elif HAVE_CUDA
496 if (dir .eq. host_to_device) then
497 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
498 .ne. cudasuccess) then
499 call neko_error('Device memcpy async (host-to-device) failed')
500 end if
501 else if (dir .eq. device_to_host) then
502 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
503 .ne. cudasuccess) then
504 call neko_error('Device memcpy async (device-to-host) failed')
505 end if
506 else if (dir .eq. device_to_device) then
507 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
508 .ne. cudasuccess) then
509 call neko_error('Device memcpy async (device-to-device) failed')
510 end if
511 else
512 call neko_error('Device memcpy failed (invalid direction')
513 end if
514 if (sync_device) then
515 call device_sync_stream(stream)
516 end if
517#elif HAVE_OPENCL
518 if (sync_device) then
519 if (dir .eq. host_to_device) then
520 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
521 ptr_h, 0, c_null_ptr, c_null_ptr) &
522 .ne. cl_success) then
523 call neko_error('Device memcpy (host-to-device) failed')
524 end if
525 else if (dir .eq. device_to_host) then
526 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
527 0, c_null_ptr, c_null_ptr) &
528 .ne. cl_success) then
529 call neko_error('Device memcpy (device-to-host) failed')
530 end if
531 else if (dir .eq. device_to_device) then
532 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
533 0, c_null_ptr, c_null_ptr) &
534 .ne. cl_success) then
535 call neko_error('Device memcpy (device-to-device) failed')
536 end if
537 else
538 call neko_error('Device memcpy failed (invalid direction')
539 end if
540 else
541 if (dir .eq. host_to_device) then
542 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
543 ptr_h, 0, c_null_ptr, c_null_ptr) &
544 .ne. cl_success) then
545 call neko_error('Device memcpy (host-to-device) failed')
546 end if
547 else if (dir .eq. device_to_host) then
548 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
549 0, c_null_ptr, c_null_ptr) &
550 .ne. cl_success) then
551 call neko_error('Device memcpy (device-to-host) failed')
552 end if
553 else if (dir .eq. device_to_device) then
554 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
555 0, c_null_ptr, c_null_ptr) &
556 .ne. cl_success) then
557 call neko_error('Device memcpy (device-to-device) failed')
558 end if
559 else
560 call neko_error('Device memcpy failed (invalid direction')
561 end if
562 end if
563#endif
564 end subroutine device_memcpy_common
565
567 subroutine device_associate_r1(x, x_d)
568 class(*), intent(inout), target :: x(:)
569 type(c_ptr), intent(inout) :: x_d
570 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
571
572 select type (x)
573 type is (integer)
574 htbl_ptr_h%ptr = c_loc(x)
575 type is (integer(i8))
576 htbl_ptr_h%ptr = c_loc(x)
577 type is (real)
578 htbl_ptr_h%ptr = c_loc(x)
579 type is (double precision)
580 htbl_ptr_h%ptr = c_loc(x)
581 class default
582 call neko_error('Unknown Fortran type')
583 end select
584
585 htbl_ptr_d%ptr = x_d
586
587 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
588
589 end subroutine device_associate_r1
590
592 subroutine device_associate_r2(x, x_d)
593 class(*), intent(inout), target :: x(:,:)
594 type(c_ptr), intent(inout) :: x_d
595 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
596
597 select type (x)
598 type is (integer)
599 htbl_ptr_h%ptr = c_loc(x)
600 type is (integer(i8))
601 htbl_ptr_h%ptr = c_loc(x)
602 type is (real)
603 htbl_ptr_h%ptr = c_loc(x)
604 type is (double precision)
605 htbl_ptr_h%ptr = c_loc(x)
606 class default
607 call neko_error('Unknown Fortran type')
608 end select
609
610 htbl_ptr_d%ptr = x_d
611
612 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
613
614 end subroutine device_associate_r2
615
617 subroutine device_associate_r3(x, x_d)
618 class(*), intent(inout), target :: x(:,:,:)
619 type(c_ptr), intent(inout) :: x_d
620 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
621
622 select type (x)
623 type is (integer)
624 htbl_ptr_h%ptr = c_loc(x)
625 type is (integer(i8))
626 htbl_ptr_h%ptr = c_loc(x)
627 type is (real)
628 htbl_ptr_h%ptr = c_loc(x)
629 type is (double precision)
630 htbl_ptr_h%ptr = c_loc(x)
631 class default
632 call neko_error('Unknown Fortran type')
633 end select
634
635 htbl_ptr_d%ptr = x_d
636
637 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
638
639 end subroutine device_associate_r3
640
642 subroutine device_associate_r4(x, x_d)
643 class(*), intent(inout), target :: x(:,:,:,:)
644 type(c_ptr), intent(inout) :: x_d
645 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
646
647 select type (x)
648 type is (integer)
649 htbl_ptr_h%ptr = c_loc(x)
650 type is (integer(i8))
651 htbl_ptr_h%ptr = c_loc(x)
652 type is (real)
653 htbl_ptr_h%ptr = c_loc(x)
654 type is (double precision)
655 htbl_ptr_h%ptr = c_loc(x)
656 class default
657 call neko_error('Unknown Fortran type')
658 end select
659
660 htbl_ptr_d%ptr = x_d
661
662 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
663
664 end subroutine device_associate_r4
665
668 class(*), intent(inout), target :: x(:)
669 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
670
671 select type (x)
672 type is (integer)
673 htbl_ptr_h%ptr = c_loc(x)
674 type is (integer(i8))
675 htbl_ptr_h%ptr = c_loc(x)
676 type is (real)
677 htbl_ptr_h%ptr = c_loc(x)
678 type is (double precision)
679 htbl_ptr_h%ptr = c_loc(x)
680 class default
681 call neko_error('Unknown Fortran type')
682 end select
683
684 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
685 call device_addrtbl%remove(htbl_ptr_h)
686 end if
687
688 end subroutine device_deassociate_r1
689
692 class(*), intent(inout), target :: x(:,:)
693 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
694
695 select type (x)
696 type is (integer)
697 htbl_ptr_h%ptr = c_loc(x)
698 type is (integer(i8))
699 htbl_ptr_h%ptr = c_loc(x)
700 type is (real)
701 htbl_ptr_h%ptr = c_loc(x)
702 type is (double precision)
703 htbl_ptr_h%ptr = c_loc(x)
704 class default
705 call neko_error('Unknown Fortran type')
706 end select
707
708 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
709 call device_addrtbl%remove(htbl_ptr_h)
710 end if
711
712 end subroutine device_deassociate_r2
713
716 class(*), intent(inout), target :: x(:,:,:)
717 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
718
719 select type (x)
720 type is (integer)
721 htbl_ptr_h%ptr = c_loc(x)
722 type is (integer(i8))
723 htbl_ptr_h%ptr = c_loc(x)
724 type is (real)
725 htbl_ptr_h%ptr = c_loc(x)
726 type is (double precision)
727 htbl_ptr_h%ptr = c_loc(x)
728 class default
729 call neko_error('Unknown Fortran type')
730 end select
731
732 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
733 call device_addrtbl%remove(htbl_ptr_h)
734 end if
735
736 end subroutine device_deassociate_r3
737
740 class(*), intent(inout), target :: x(:,:,:,:)
741 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
742
743 select type (x)
744 type is (integer)
745 htbl_ptr_h%ptr = c_loc(x)
746 type is (integer(i8))
747 htbl_ptr_h%ptr = c_loc(x)
748 type is (real)
749 htbl_ptr_h%ptr = c_loc(x)
750 type is (double precision)
751 htbl_ptr_h%ptr = c_loc(x)
752 class default
753 call neko_error('Unknown Fortran type')
754 end select
755
756 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
757 call device_addrtbl%remove(htbl_ptr_h)
758 end if
759
760 end subroutine device_deassociate_r4
761
763 subroutine device_map_r1(x, x_d, n)
764 integer, intent(in) :: n
765 class(*), intent(inout), target :: x(:)
766 type(c_ptr), intent(inout) :: x_d
767 integer(c_size_t) :: s
768
769 if (c_associated(x_d)) then
770 call neko_error('Device pointer already associated')
771 end if
772
773 select type (x)
774 type is (integer)
775 s = n * int(4, c_size_t)
776 type is (integer(i8))
777 s = n * int(8, c_size_t)
778 type is (real)
779 s = n * int(4, c_size_t)
780 type is (double precision)
781 s = n * int(8, c_size_t)
782 class default
783 call neko_error('Unknown Fortran type')
784 end select
785
786 call device_alloc(x_d, s)
787 call device_associate(x, x_d)
788
789 end subroutine device_map_r1
790
792 subroutine device_map_r2(x, x_d, n)
793 integer, intent(in) :: n
794 class(*), intent(inout), target :: x(:,:)
795 type(c_ptr), intent(inout) :: x_d
796 integer(c_size_t) :: s
797
798 if (c_associated(x_d)) then
799 call neko_error('Device pointer already associated')
800 end if
801
802 select type (x)
803 type is (integer)
804 s = n * int(4, c_size_t)
805 type is (integer(i8))
806 s = n * int(8, c_size_t)
807 type is (real)
808 s = n * int(4, c_size_t)
809 type is (double precision)
810 s = n * int(8, c_size_t)
811 class default
812 call neko_error('Unknown Fortran type')
813 end select
814
815 call device_alloc(x_d, s)
816 call device_associate(x, x_d)
817
818 end subroutine device_map_r2
819
821 subroutine device_map_r3(x, x_d, n)
822 integer, intent(in) :: n
823 class(*), intent(inout), target :: x(:,:,:)
824 type(c_ptr), intent(inout) :: x_d
825 integer(c_size_t) :: s
826
827 if (c_associated(x_d)) then
828 call neko_error('Device pointer already associated')
829 end if
830
831 select type (x)
832 type is (integer)
833 s = n * int(4, c_size_t)
834 type is (integer(i8))
835 s = n * int(8, c_size_t)
836 type is (real)
837 s = n * int(4, c_size_t)
838 type is (double precision)
839 s = n * int(8, c_size_t)
840 class default
841 call neko_error('Unknown Fortran type')
842 end select
843
844 call device_alloc(x_d, s)
845 call device_associate(x, x_d)
846
847 end subroutine device_map_r3
848
850 subroutine device_map_r4(x, x_d, n)
851 integer, intent(in) :: n
852 class(*), intent(inout), target :: x(:,:,:,:)
853 type(c_ptr), intent(inout) :: x_d
854 integer(c_size_t) :: s
855
856 if (c_associated(x_d)) then
857 call neko_error('Device pointer already associated')
858 end if
859
860 select type (x)
861 type is (integer)
862 s = n * int(4, c_size_t)
863 type is (integer(i8))
864 s = n * int(8, c_size_t)
865 type is (real)
866 s = n * int(4, c_size_t)
867 type is (double precision)
868 s = n * int(8, c_size_t)
869 class default
870 call neko_error('Unknown Fortran type')
871 end select
872
873 call device_alloc(x_d, s)
874 call device_associate(x, x_d)
875
876 end subroutine device_map_r4
877
879 function device_associated_r1(x) result(assoc)
880 class(*), intent(inout), target :: x(:)
881 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
882 logical :: assoc
883
884 select type (x)
885 type is (integer)
886 htbl_ptr_h%ptr = c_loc(x)
887 type is (integer(i8))
888 htbl_ptr_h%ptr = c_loc(x)
889 type is (real)
890 htbl_ptr_h%ptr = c_loc(x)
891 type is (double precision)
892 htbl_ptr_h%ptr = c_loc(x)
893 class default
894 call neko_error('Unknown Fortran type')
895 end select
896
897 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
898 assoc = .true.
899 else
900 assoc = .false.
901 end if
902
903 end function device_associated_r1
904
906 function device_associated_r2(x) result(assoc)
907 class(*), intent(inout), target :: x(:,:)
908 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
909 logical :: assoc
910
911 select type (x)
912 type is (integer)
913 htbl_ptr_h%ptr = c_loc(x)
914 type is (integer(i8))
915 htbl_ptr_h%ptr = c_loc(x)
916 type is (real)
917 htbl_ptr_h%ptr = c_loc(x)
918 type is (double precision)
919 htbl_ptr_h%ptr = c_loc(x)
920 class default
921 call neko_error('Unknown Fortran type')
922 end select
923
924 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
925 assoc = .true.
926 else
927 assoc = .false.
928 end if
929
930 end function device_associated_r2
931
933 function device_associated_r3(x) result(assoc)
934 class(*), intent(inout), target :: x(:,:,:)
935 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
936 logical :: assoc
937
938 select type (x)
939 type is (integer)
940 htbl_ptr_h%ptr = c_loc(x)
941 type is (integer(i8))
942 htbl_ptr_h%ptr = c_loc(x)
943 type is (real)
944 htbl_ptr_h%ptr = c_loc(x)
945 type is (double precision)
946 htbl_ptr_h%ptr = c_loc(x)
947 class default
948 call neko_error('Unknown Fortran type')
949 end select
950
951 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
952 assoc = .true.
953 else
954 assoc = .false.
955 end if
956
957 end function device_associated_r3
958
960 function device_associated_r4(x) result(assoc)
961 class(*), intent(inout), target :: x(:,:,:,:)
962 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
963 logical :: assoc
964
965 select type (x)
966 type is (integer)
967 htbl_ptr_h%ptr = c_loc(x)
968 type is (integer(i8))
969 htbl_ptr_h%ptr = c_loc(x)
970 type is (real)
971 htbl_ptr_h%ptr = c_loc(x)
972 type is (double precision)
973 htbl_ptr_h%ptr = c_loc(x)
974 class default
975 call neko_error('Unknown Fortran type')
976 end select
977
978 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
979 assoc = .true.
980 else
981 assoc = .false.
982 end if
983
984 end function device_associated_r4
985
988 class(*), intent(in), target :: x(:)
989 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
990 type(c_ptr) :: device_get_ptr_r1
991
992 device_get_ptr_r1 = c_null_ptr
993
994 select type (x)
995 type is (integer)
996 htbl_ptr_h%ptr = c_loc(x)
997 type is (integer(i8))
998 htbl_ptr_h%ptr = c_loc(x)
999 type is (real)
1000 htbl_ptr_h%ptr = c_loc(x)
1001 type is (double precision)
1002 htbl_ptr_h%ptr = c_loc(x)
1003 class default
1004 call neko_error('Unknown Fortran type')
1005 end select
1006
1007 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1008 device_get_ptr_r1 = htbl_ptr_d%ptr
1009 else
1010 call neko_error('Array not associated with device')
1011 end if
1012 end function device_get_ptr_r1
1013
1016 class(*), intent(in), target :: x(:,:)
1017 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1018 type(c_ptr) :: device_get_ptr_r2
1019
1020 device_get_ptr_r2 = c_null_ptr
1021
1022 select type (x)
1023 type is (integer)
1024 htbl_ptr_h%ptr = c_loc(x)
1025 type is (integer(i8))
1026 htbl_ptr_h%ptr = c_loc(x)
1027 type is (real)
1028 htbl_ptr_h%ptr = c_loc(x)
1029 type is (double precision)
1030 htbl_ptr_h%ptr = c_loc(x)
1031 class default
1032 call neko_error('Unknown Fortran type')
1033 end select
1034
1035 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1036 device_get_ptr_r2 = htbl_ptr_d%ptr
1037 else
1038 call neko_error('Array not associated with device')
1039 end if
1040 end function device_get_ptr_r2
1041
1044 class(*), intent(in), target :: x(:,:,:)
1045 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1046 type(c_ptr) :: device_get_ptr_r3
1047
1048 device_get_ptr_r3 = c_null_ptr
1049
1050 select type (x)
1051 type is (integer)
1052 htbl_ptr_h%ptr = c_loc(x)
1053 type is (integer(i8))
1054 htbl_ptr_h%ptr = c_loc(x)
1055 type is (real)
1056 htbl_ptr_h%ptr = c_loc(x)
1057 type is (double precision)
1058 htbl_ptr_h%ptr = c_loc(x)
1059 class default
1060 call neko_error('Unknown Fortran type')
1061 end select
1062
1063 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1064 device_get_ptr_r3 = htbl_ptr_d%ptr
1065 else
1066 call neko_error('Array not associated with device')
1067 end if
1068 end function device_get_ptr_r3
1069
1072 class(*), intent(in), target :: x(:,:,:,:)
1073 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1074 type(c_ptr) :: device_get_ptr_r4
1075
1076 device_get_ptr_r4 = c_null_ptr
1077
1078 select type (x)
1079 type is (integer)
1080 htbl_ptr_h%ptr = c_loc(x)
1081 type is (integer(i8))
1082 htbl_ptr_h%ptr = c_loc(x)
1083 type is (real)
1084 htbl_ptr_h%ptr = c_loc(x)
1085 type is (double precision)
1086 htbl_ptr_h%ptr = c_loc(x)
1087 class default
1088 call neko_error('Unknown Fortran type')
1089 end select
1090
1091 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1092 device_get_ptr_r4 = htbl_ptr_d%ptr
1093 else
1094 call neko_error('Array not associated with device')
1095 end if
1096 end function device_get_ptr_r4
1097
1100#ifdef HAVE_HIP
1101 if (hipdevicesynchronize() .ne. hipsuccess) then
1102 call neko_error('Error during device sync')
1103 end if
1104#elif HAVE_CUDA
1105 if (cudadevicesynchronize() .ne. cudasuccess) then
1106 call neko_error('Error during device sync')
1107 end if
1108#elif HAVE_OPENCL
1109 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1110 call neko_error('Error during device sync')
1111 end if
1112#endif
1113 end subroutine device_sync_device
1114
1116 subroutine device_sync_stream(stream)
1117 type(c_ptr), intent(in) :: stream
1118#ifdef HAVE_HIP
1119 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1120 call neko_error('Error during stream sync')
1121 end if
1122#elif HAVE_CUDA
1123 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1124 call neko_error('Error during stream sync')
1125 end if
1126#elif HAVE_OPENCL
1127 if (clfinish(stream) .ne. cl_success) then
1128 call neko_error('Error during stream sync')
1129 end if
1130#endif
1131 end subroutine device_sync_stream
1132
1134 subroutine device_stream_create(stream, flags)
1135 type(c_ptr), intent(inout) :: stream
1136 integer, optional :: flags
1137 integer :: ierr
1138#ifdef HAVE_HIP
1139 if (present(flags)) then
1140 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1141 call neko_error('Error during stream create (w. flags)')
1142 end if
1143 else
1144 if (hipstreamcreate(stream) .ne. hipsuccess) then
1145 call neko_error('Error during stream create')
1146 end if
1147 end if
1148#elif HAVE_CUDA
1149 if (present(flags)) then
1150 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1151 call neko_error('Error during stream create (w. flags)')
1152 end if
1153 else
1154 if (cudastreamcreate(stream) .ne. cudasuccess) then
1155 call neko_error('Error during stream create')
1156 end if
1157 end if
1158#elif HAVE_OPENCL
1159 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1160 if (ierr .ne. cl_success) then
1161 call neko_error('Error during stream create')
1162 end if
1163#endif
1164 end subroutine device_stream_create
1165
1167 subroutine device_stream_create_with_priority(stream, flags, prio)
1168 type(c_ptr), intent(inout) :: stream
1169 integer, intent(in) :: flags, prio
1170#ifdef HAVE_HIP
1171 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1172 call neko_error('Error during stream create (w. priority)')
1173 end if
1174#elif HAVE_CUDA
1175 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1176 call neko_error('Error during stream create (w. priority)')
1177 end if
1178#elif HAVE_OPENCL
1179 call neko_error('Not implemented yet')
1180#endif
1182
1184 subroutine device_stream_destroy(stream)
1185 type(c_ptr), intent(inout) :: stream
1186#ifdef HAVE_HIP
1187 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1188 call neko_error('Error during stream destroy')
1189 end if
1190#elif HAVE_CUDA
1191 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1192 call neko_error('Error during stream destroy')
1193 end if
1194#elif HAVE_OPENCL
1195 if (clreleasecommandqueue(stream) .ne. cl_success) then
1196 call neko_error('Error during stream destroy')
1197 end if
1198#endif
1199 end subroutine device_stream_destroy
1200
1202 subroutine device_stream_wait_event(stream, event, flags)
1203 type(c_ptr), intent(in) :: stream
1204 type(c_ptr), target, intent(in) :: event
1205 integer :: flags
1206#ifdef HAVE_HIP
1207 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1208 call neko_error('Error during stream sync')
1209 end if
1210#elif HAVE_CUDA
1211 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1212 call neko_error('Error during stream sync')
1213 end if
1214#elif HAVE_OPENCL
1215 if (clenqueuebarrier(stream) .ne. cl_success) then
1216 call neko_error('Error during barrier')
1217 end if
1218 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1219 call neko_error('Error during stream sync')
1220 end if
1221#endif
1222 end subroutine device_stream_wait_event
1223
1226#if HAVE_CUDA
1227 if (cudaprofilerstart() .ne. cudasuccess) then
1228 call neko_error('Error starting profiler')
1229 end if
1230#endif
1231 end subroutine device_profiler_start
1232
1235#if HAVE_CUDA
1236 if (cudaprofilerstop() .ne. cudasuccess) then
1237 call neko_error('Error stopping profiler')
1238 end if
1239#endif
1240 end subroutine device_profiler_stop
1241
1243 subroutine device_event_create(event, flags)
1244 type(c_ptr), intent(inout) :: event
1245 integer, optional :: flags
1246 integer :: ierr
1247#ifdef HAVE_HIP
1248 if (present(flags)) then
1249 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1250 call neko_error('Error during event create (w. flags)')
1251 end if
1252 else
1253 if (hipeventcreate(event) .ne. hipsuccess) then
1254 call neko_error('Error during event create')
1255 end if
1256 end if
1257#elif HAVE_CUDA
1258 if (present(flags)) then
1259 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1260 call neko_error('Error during event create (w. flags)')
1261 end if
1262 else
1263 if (cudaeventcreate(event) .ne. cudasuccess) then
1264 call neko_error('Error during event create')
1265 end if
1266 end if
1267#elif HAVE_OPENCL
1268 event = c_null_ptr
1269#endif
1270 end subroutine device_event_create
1271
1273 subroutine device_event_destroy(event)
1274 type(c_ptr), intent(inout) :: event
1275#ifdef HAVE_HIP
1276 if (hipeventdestroy(event) .ne. hipsuccess) then
1277 call neko_error('Error during event destroy')
1278 end if
1279#elif HAVE_CUDA
1280 if (cudaeventdestroy(event) .ne. cudasuccess) then
1281 call neko_error('Error during event destroy')
1282 end if
1283#elif HAVE_OPENCL
1284 event = c_null_ptr
1285#endif
1286 end subroutine device_event_destroy
1287
1289 subroutine device_event_record(event, stream)
1290 type(c_ptr), target, intent(in) :: event
1291 type(c_ptr), intent(in) :: stream
1292#ifdef HAVE_HIP
1293 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1294 call neko_error('Error recording an event')
1295 end if
1296#elif HAVE_CUDA
1297 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1298 call neko_error('Error recording an event')
1299 end if
1300#elif HAVE_OPENCL
1301 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1302 call neko_error('Error recording an event')
1303 end if
1304#endif
1305 end subroutine device_event_record
1306
1308 subroutine device_event_sync(event)
1309 type(c_ptr), target, intent(in) :: event
1310#ifdef HAVE_HIP
1311 if (hipeventsynchronize(event) .ne. hipsuccess) then
1312 call neko_error('Error during event sync')
1313 end if
1314#elif HAVE_CUDA
1315 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1316 call neko_error('Error during event sync')
1317 end if
1318#elif HAVE_OPENCL
1319 if (c_associated(event)) then
1320 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1321 call neko_error('Error during event sync')
1322 end if
1323 end if
1324#endif
1325 end subroutine device_event_sync
1326
1327end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:78
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:84
Deassociate a Fortran array from a device pointer.
Definition device.F90:90
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Copy data between host and device (or device and device)
Definition device.F90:66
Synchronize a device or stream.
Definition device.F90:102
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1290
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1309
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:593
subroutine, public device_finalize
Definition device.F90:145
integer, parameter, public device_to_device
Definition device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1072
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:988
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:63
integer, parameter, public host_to_device
Definition device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:822
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:459
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:934
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:107
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1235
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:716
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1117
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:1044
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1226
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:793
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:314
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:851
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
integer, parameter, public device_to_host
Definition device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:430
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1274
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:187
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1168
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1135
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:740
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1100
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:568
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:390
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1203
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:764
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:276
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1244
integer function, public device_count()
Return the number of available devices.
Definition device.F90:174
subroutine, public device_name(name)
Definition device.F90:161
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:961
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:907
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:60
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:1016
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:643
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:668
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:57
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:692
subroutine, public device_init
Definition device.F90:123
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:618
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:880
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
Definition device.F90:233
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:352
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1185
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:271
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:236
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:258
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:288
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue)
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:107
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:142