Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
40 use htable, only : htable_cptr_t, h_cptr_t
41 use utils, only : neko_error
43 use, intrinsic :: iso_c_binding
44 implicit none
45 private
46
47 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49
51 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
52
54 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
55
56#ifdef HAVE_OPENCL
57
58 type(c_ptr), public, bind(c) :: prf_cmd_queue = c_null_ptr
59#endif
60
62 type(c_ptr), public, bind(c) :: glb_cmd_event
63
65 integer, public :: strm_high_prio
66
68 integer, public :: strm_low_prio
69
74 end interface device_memcpy
75
77 interface device_map
78 module procedure device_map_r1, device_map_r2, &
80 end interface device_map
81
86 end interface device_associate
87
92 end interface device_associated
93
98 end interface device_deassociate
99
104 end interface device_get_ptr
105
107 interface device_sync
108 module procedure device_sync_device, device_sync_stream
109 end interface device_sync
110
113
122
123 private :: device_memcpy_common
124
125contains
126
127 subroutine device_init
128#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
129 call device_addrtbl%init(64)
130
131#ifdef HAVE_HIP
133#elif HAVE_CUDA
135#elif HAVE_OPENCL
137#endif
139#endif
140
141 ! Check the device count against the number of MPI ranks
142 if (neko_bcknd_device .eq. 1) then
143 if (device_count() .ne. 1) then
144 call neko_error('Only one device is supported per MPI rank')
145 end if
146 end if
147 end subroutine device_init
148
150#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
151 call device_addrtbl%free()
152
153#ifdef HAVE_HIP
155#elif HAVE_CUDA
157#elif HAVE_OPENCL
160#endif
162#endif
163 end subroutine device_finalize
164
165 subroutine device_name(name)
166 character(len=*), intent(inout) :: name
167
168#ifdef HAVE_HIP
169 call hip_device_name(name)
170#elif HAVE_CUDA
171 call cuda_device_name(name)
172#elif HAVE_OPENCL
173 call opencl_device_name(name)
174#endif
175 end subroutine device_name
176
178 integer function device_count()
179#ifdef HAVE_HIP
181#elif HAVE_CUDA
183#elif HAVE_OPENCL
185#else
186 device_count = 0
187#endif
188 end function device_count
189
191 subroutine device_alloc(x_d, s)
192 type(c_ptr), intent(inout) :: x_d
193 integer(c_size_t) :: s
194 integer :: ierr
195
196 if (s .eq. 0) then
197 call device_sync()
198 x_d = c_null_ptr
199 return
200 end if
201#ifdef HAVE_HIP
202 if (hipmalloc(x_d, s) .ne. hipsuccess) then
203 call neko_error('Memory allocation on device failed')
204 end if
205#elif HAVE_CUDA
206 if (cudamalloc(x_d, s) .ne. cudasuccess) then
207 call neko_error('Memory allocation on device failed')
208 end if
209#elif HAVE_OPENCL
210 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
211 if (ierr .ne. cl_success) then
212 call neko_error('Memory allocation on device failed')
213 end if
214#endif
215 end subroutine device_alloc
216
218 subroutine device_free(x_d)
219 type(c_ptr), intent(inout) :: x_d
220#ifdef HAVE_HIP
221 if (hipfree(x_d) .ne. hipsuccess) then
222 call neko_error('Memory deallocation on device failed')
223 end if
224#elif HAVE_CUDA
225 if (cudafree(x_d) .ne. cudasuccess) then
226 call neko_error('Memory deallocation on device failed')
227 end if
228#elif HAVE_OPENCL
229 if (clreleasememobject(x_d) .ne. cl_success) then
230 call neko_error('Memory deallocation on device failed')
231 end if
232#endif
233 x_d = c_null_ptr
234 end subroutine device_free
235
237 subroutine device_memset(x_d, v, s, sync, strm)
238 type(c_ptr), intent(inout) :: x_d
239 integer(c_int), target, value :: v
240 integer(c_size_t), intent(in) :: s
241 logical, optional :: sync
242 type(c_ptr), optional :: strm
243 type(c_ptr) :: stream
244 logical :: sync_device
245
246 if (present(sync)) then
247 sync_device = sync
248 else
249 sync_device = .false.
250 end if
251
252 if (present(strm)) then
253 stream = strm
254 else
255 stream = glb_cmd_queue
256 end if
257
258#ifdef HAVE_HIP
259 if (hipmemsetasync(x_d, v, s, stream) .ne. hipsuccess) then
260 call neko_error('Device memset async failed')
261 end if
262#elif HAVE_CUDA
263 if (cudamemsetasync(x_d, v, s, stream) .ne. cudasuccess) then
264 call neko_error('Device memset async failed')
265 end if
266#elif HAVE_OPENCL
267 if (clenqueuefillbuffer(stream, x_d, c_loc(v), c_sizeof(v), 0_i8, &
268 s, 0, c_null_ptr, c_null_ptr) .ne. cl_success) then
269 call neko_error('Device memset async failed')
270 end if
271#endif
272
273 if (sync_device) then
274 call device_sync_stream(stream)
275 end if
276
277 end subroutine device_memset
278
280 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
281 integer, intent(in) :: n
282 class(*), intent(inout), target :: x(:)
283 type(c_ptr), intent(inout) :: x_d
284 integer, intent(in), value :: dir
285 logical :: sync
286 type(c_ptr), optional :: strm
287 type(c_ptr) :: ptr_h, copy_stream
288 integer(c_size_t) :: s
289
290 if (present(strm)) then
291 copy_stream = strm
292 else
293 copy_stream = glb_cmd_queue
294 end if
295
296 select type (x)
297 type is (integer)
298 s = n * int(4, c_size_t)
299 ptr_h = c_loc(x)
300 type is (integer(i8))
301 s = n * int(8, c_size_t)
302 ptr_h = c_loc(x)
303 type is (real)
304 s = n * int(4, c_size_t)
305 ptr_h = c_loc(x)
306 type is (double precision)
307 s = n * int(8, c_size_t)
308 ptr_h = c_loc(x)
309 class default
310 call neko_error('Unknown Fortran type')
311 end select
312
313 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
314
315 end subroutine device_memcpy_r1
316
318 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
319 integer, intent(in) :: n
320 class(*), intent(inout), target :: x(:,:)
321 type(c_ptr), intent(inout) :: x_d
322 integer, intent(in), value :: dir
323 logical :: sync
324 type(c_ptr), optional :: strm
325 type(c_ptr) :: ptr_h, copy_stream
326 integer(c_size_t) :: s
327
328 if (present(strm)) then
329 copy_stream = strm
330 else
331 copy_stream = glb_cmd_queue
332 end if
333
334 select type (x)
335 type is (integer)
336 s = n * int(4, c_size_t)
337 ptr_h = c_loc(x)
338 type is (integer(i8))
339 s = n * int(8, c_size_t)
340 ptr_h = c_loc(x)
341 type is (real)
342 s = n * int(4, c_size_t)
343 ptr_h = c_loc(x)
344 type is (double precision)
345 s = n * int(8, c_size_t)
346 ptr_h = c_loc(x)
347 class default
348 call neko_error('Unknown Fortran type')
349 end select
350
351 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
352
353 end subroutine device_memcpy_r2
354
356 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
357 integer, intent(in) :: n
358 class(*), intent(inout), target :: x(:,:,:)
359 type(c_ptr), intent(inout) :: x_d
360 integer, intent(in), value :: dir
361 logical :: sync
362 type(c_ptr), optional :: strm
363 type(c_ptr) :: ptr_h, copy_stream
364 integer(c_size_t) :: s
365
366 if (present(strm)) then
367 copy_stream = strm
368 else
369 copy_stream = glb_cmd_queue
370 end if
371
372 select type (x)
373 type is (integer)
374 s = n * int(4, c_size_t)
375 ptr_h = c_loc(x)
376 type is (integer(i8))
377 s = n * int(8, c_size_t)
378 ptr_h = c_loc(x)
379 type is (real)
380 s = n * int(4, c_size_t)
381 ptr_h = c_loc(x)
382 type is (double precision)
383 s = n * int(8, c_size_t)
384 ptr_h = c_loc(x)
385 class default
386 call neko_error('Unknown Fortran type')
387 end select
388
389 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
390
391 end subroutine device_memcpy_r3
392
394 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
395 integer, intent(in) :: n
396 class(*), intent(inout), target :: x(:,:,:,:)
397 type(c_ptr), intent(inout) :: x_d
398 integer, intent(in), value :: dir
399 logical :: sync
400 type(c_ptr), optional :: strm
401 type(c_ptr) :: ptr_h, copy_stream
402 integer(c_size_t) :: s
403
404 if (present(strm)) then
405 copy_stream = strm
406 else
407 copy_stream = glb_cmd_queue
408 end if
409
410 select type (x)
411 type is (integer)
412 s = n * int(4, c_size_t)
413 ptr_h = c_loc(x)
414 type is (integer(i8))
415 s = n * int(8, c_size_t)
416 ptr_h = c_loc(x)
417 type is (real)
418 s = n * int(4, c_size_t)
419 ptr_h = c_loc(x)
420 type is (double precision)
421 s = n * int(8, c_size_t)
422 ptr_h = c_loc(x)
423 class default
424 call neko_error('Unknown Fortran type')
425 end select
426
427 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
428
429 end subroutine device_memcpy_r4
430
434 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
435 type(c_ptr), intent(inout) :: dst
436 type(c_ptr), intent(inout) :: src
437 integer(c_size_t), intent(in) :: s
438 integer, intent(in), value :: dir
439 logical, optional :: sync
440 type(c_ptr), optional :: strm
441 type(c_ptr) :: copy_stream
442 logical :: sync_device
443
444 if (present(sync)) then
445 sync_device = sync
446 else
447 sync_device = .false.
448 end if
449
450 if (present(strm)) then
451 copy_stream = strm
452 else
453 copy_stream = glb_cmd_queue
454 end if
455
456 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
457
458 end subroutine device_memcpy_cptr
459
463 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
464 type(c_ptr), intent(inout) :: ptr_h
465 type(c_ptr), intent(inout) :: x_d
466 integer(c_size_t), intent(in) :: s
467 integer, intent(in), value :: dir
468 logical, intent(in) :: sync_device
469 type(c_ptr), intent(inout) :: stream
470
471 if (s .eq. 0) then
472 if (sync_device) then
473 call device_sync_stream(stream)
474 end if
475 return
476 end if
477
478#ifdef HAVE_HIP
479 if (dir .eq. host_to_device) then
480 if (hipmemcpyasync(x_d, ptr_h, s, &
481 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
482 call neko_error('Device memcpy async (host-to-device) failed')
483 end if
484 else if (dir .eq. device_to_host) then
485 if (hipmemcpyasync(ptr_h, x_d, s, &
486 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
487 call neko_error('Device memcpy async (device-to-host) failed')
488 end if
489 else if (dir .eq. device_to_device) then
490 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
491 .ne. hipsuccess) then
492 call neko_error('Device memcpy async (device-to-device) failed')
493 end if
494 else
495 call neko_error('Device memcpy failed (invalid direction')
496 end if
497 if (sync_device) then
498 call device_sync_stream(stream)
499 end if
500#elif HAVE_CUDA
501 if (dir .eq. host_to_device) then
502 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
503 .ne. cudasuccess) then
504 call neko_error('Device memcpy async (host-to-device) failed')
505 end if
506 else if (dir .eq. device_to_host) then
507 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
508 .ne. cudasuccess) then
509 call neko_error('Device memcpy async (device-to-host) failed')
510 end if
511 else if (dir .eq. device_to_device) then
512 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
513 .ne. cudasuccess) then
514 call neko_error('Device memcpy async (device-to-device) failed')
515 end if
516 else
517 call neko_error('Device memcpy failed (invalid direction')
518 end if
519 if (sync_device) then
520 call device_sync_stream(stream)
521 end if
522#elif HAVE_OPENCL
523 if (sync_device) then
524 if (dir .eq. host_to_device) then
525 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
526 ptr_h, 0, c_null_ptr, c_null_ptr) &
527 .ne. cl_success) then
528 call neko_error('Device memcpy (host-to-device) failed')
529 end if
530 else if (dir .eq. device_to_host) then
531 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
532 0, c_null_ptr, c_null_ptr) &
533 .ne. cl_success) then
534 call neko_error('Device memcpy (device-to-host) failed')
535 end if
536 else if (dir .eq. device_to_device) then
537 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
538 0, c_null_ptr, c_null_ptr) &
539 .ne. cl_success) then
540 call neko_error('Device memcpy (device-to-device) failed')
541 end if
542 else
543 call neko_error('Device memcpy failed (invalid direction')
544 end if
545 else
546 if (dir .eq. host_to_device) then
547 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
548 ptr_h, 0, c_null_ptr, c_null_ptr) &
549 .ne. cl_success) then
550 call neko_error('Device memcpy (host-to-device) failed')
551 end if
552 else if (dir .eq. device_to_host) then
553 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
554 0, c_null_ptr, c_null_ptr) &
555 .ne. cl_success) then
556 call neko_error('Device memcpy (device-to-host) failed')
557 end if
558 else if (dir .eq. device_to_device) then
559 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
560 0, c_null_ptr, c_null_ptr) &
561 .ne. cl_success) then
562 call neko_error('Device memcpy (device-to-device) failed')
563 end if
564 else
565 call neko_error('Device memcpy failed (invalid direction')
566 end if
567 end if
568#endif
569 end subroutine device_memcpy_common
570
572 subroutine device_associate_r1(x, x_d)
573 class(*), intent(inout), target :: x(:)
574 type(c_ptr), intent(inout) :: x_d
575 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
576
577 select type (x)
578 type is (integer)
579 htbl_ptr_h%ptr = c_loc(x)
580 type is (integer(i8))
581 htbl_ptr_h%ptr = c_loc(x)
582 type is (real)
583 htbl_ptr_h%ptr = c_loc(x)
584 type is (double precision)
585 htbl_ptr_h%ptr = c_loc(x)
586 class default
587 call neko_error('Unknown Fortran type')
588 end select
589
590 htbl_ptr_d%ptr = x_d
591
592 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
593
594 end subroutine device_associate_r1
595
597 subroutine device_associate_r2(x, x_d)
598 class(*), intent(inout), target :: x(:,:)
599 type(c_ptr), intent(inout) :: x_d
600 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
601
602 select type (x)
603 type is (integer)
604 htbl_ptr_h%ptr = c_loc(x)
605 type is (integer(i8))
606 htbl_ptr_h%ptr = c_loc(x)
607 type is (real)
608 htbl_ptr_h%ptr = c_loc(x)
609 type is (double precision)
610 htbl_ptr_h%ptr = c_loc(x)
611 class default
612 call neko_error('Unknown Fortran type')
613 end select
614
615 htbl_ptr_d%ptr = x_d
616
617 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
618
619 end subroutine device_associate_r2
620
622 subroutine device_associate_r3(x, x_d)
623 class(*), intent(inout), target :: x(:,:,:)
624 type(c_ptr), intent(inout) :: x_d
625 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
626
627 select type (x)
628 type is (integer)
629 htbl_ptr_h%ptr = c_loc(x)
630 type is (integer(i8))
631 htbl_ptr_h%ptr = c_loc(x)
632 type is (real)
633 htbl_ptr_h%ptr = c_loc(x)
634 type is (double precision)
635 htbl_ptr_h%ptr = c_loc(x)
636 class default
637 call neko_error('Unknown Fortran type')
638 end select
639
640 htbl_ptr_d%ptr = x_d
641
642 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
643
644 end subroutine device_associate_r3
645
647 subroutine device_associate_r4(x, x_d)
648 class(*), intent(inout), target :: x(:,:,:,:)
649 type(c_ptr), intent(inout) :: x_d
650 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
651
652 select type (x)
653 type is (integer)
654 htbl_ptr_h%ptr = c_loc(x)
655 type is (integer(i8))
656 htbl_ptr_h%ptr = c_loc(x)
657 type is (real)
658 htbl_ptr_h%ptr = c_loc(x)
659 type is (double precision)
660 htbl_ptr_h%ptr = c_loc(x)
661 class default
662 call neko_error('Unknown Fortran type')
663 end select
664
665 htbl_ptr_d%ptr = x_d
666
667 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
668
669 end subroutine device_associate_r4
670
673 class(*), intent(inout), target :: x(:)
674 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
675
676 select type (x)
677 type is (integer)
678 htbl_ptr_h%ptr = c_loc(x)
679 type is (integer(i8))
680 htbl_ptr_h%ptr = c_loc(x)
681 type is (real)
682 htbl_ptr_h%ptr = c_loc(x)
683 type is (double precision)
684 htbl_ptr_h%ptr = c_loc(x)
685 class default
686 call neko_error('Unknown Fortran type')
687 end select
688
689 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
690 call device_addrtbl%remove(htbl_ptr_h)
691 end if
692
693 end subroutine device_deassociate_r1
694
697 class(*), intent(inout), target :: x(:,:)
698 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
699
700 select type (x)
701 type is (integer)
702 htbl_ptr_h%ptr = c_loc(x)
703 type is (integer(i8))
704 htbl_ptr_h%ptr = c_loc(x)
705 type is (real)
706 htbl_ptr_h%ptr = c_loc(x)
707 type is (double precision)
708 htbl_ptr_h%ptr = c_loc(x)
709 class default
710 call neko_error('Unknown Fortran type')
711 end select
712
713 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
714 call device_addrtbl%remove(htbl_ptr_h)
715 end if
716
717 end subroutine device_deassociate_r2
718
721 class(*), intent(inout), target :: x(:,:,:)
722 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
723
724 select type (x)
725 type is (integer)
726 htbl_ptr_h%ptr = c_loc(x)
727 type is (integer(i8))
728 htbl_ptr_h%ptr = c_loc(x)
729 type is (real)
730 htbl_ptr_h%ptr = c_loc(x)
731 type is (double precision)
732 htbl_ptr_h%ptr = c_loc(x)
733 class default
734 call neko_error('Unknown Fortran type')
735 end select
736
737 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
738 call device_addrtbl%remove(htbl_ptr_h)
739 end if
740
741 end subroutine device_deassociate_r3
742
745 class(*), intent(inout), target :: x(:,:,:,:)
746 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
747
748 select type (x)
749 type is (integer)
750 htbl_ptr_h%ptr = c_loc(x)
751 type is (integer(i8))
752 htbl_ptr_h%ptr = c_loc(x)
753 type is (real)
754 htbl_ptr_h%ptr = c_loc(x)
755 type is (double precision)
756 htbl_ptr_h%ptr = c_loc(x)
757 class default
758 call neko_error('Unknown Fortran type')
759 end select
760
761 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
762 call device_addrtbl%remove(htbl_ptr_h)
763 end if
764
765 end subroutine device_deassociate_r4
766
768 subroutine device_map_r1(x, x_d, n)
769 integer, intent(in) :: n
770 class(*), intent(inout), target :: x(:)
771 type(c_ptr), intent(inout) :: x_d
772 integer(c_size_t) :: s
773
774 if (c_associated(x_d)) then
775 call neko_error('Device pointer already associated')
776 end if
777
778 select type (x)
779 type is (integer)
780 s = n * int(4, c_size_t)
781 type is (integer(i8))
782 s = n * int(8, c_size_t)
783 type is (real)
784 s = n * int(4, c_size_t)
785 type is (double precision)
786 s = n * int(8, c_size_t)
787 class default
788 call neko_error('Unknown Fortran type')
789 end select
790
791 call device_alloc(x_d, s)
792 call device_associate(x, x_d)
793
794 end subroutine device_map_r1
795
797 subroutine device_map_r2(x, x_d, n)
798 integer, intent(in) :: n
799 class(*), intent(inout), target :: x(:,:)
800 type(c_ptr), intent(inout) :: x_d
801 integer(c_size_t) :: s
802
803 if (c_associated(x_d)) then
804 call neko_error('Device pointer already associated')
805 end if
806
807 select type (x)
808 type is (integer)
809 s = n * int(4, c_size_t)
810 type is (integer(i8))
811 s = n * int(8, c_size_t)
812 type is (real)
813 s = n * int(4, c_size_t)
814 type is (double precision)
815 s = n * int(8, c_size_t)
816 class default
817 call neko_error('Unknown Fortran type')
818 end select
819
820 call device_alloc(x_d, s)
821 call device_associate(x, x_d)
822
823 end subroutine device_map_r2
824
826 subroutine device_map_r3(x, x_d, n)
827 integer, intent(in) :: n
828 class(*), intent(inout), target :: x(:,:,:)
829 type(c_ptr), intent(inout) :: x_d
830 integer(c_size_t) :: s
831
832 if (c_associated(x_d)) then
833 call neko_error('Device pointer already associated')
834 end if
835
836 select type (x)
837 type is (integer)
838 s = n * int(4, c_size_t)
839 type is (integer(i8))
840 s = n * int(8, c_size_t)
841 type is (real)
842 s = n * int(4, c_size_t)
843 type is (double precision)
844 s = n * int(8, c_size_t)
845 class default
846 call neko_error('Unknown Fortran type')
847 end select
848
849 call device_alloc(x_d, s)
850 call device_associate(x, x_d)
851
852 end subroutine device_map_r3
853
855 subroutine device_map_r4(x, x_d, n)
856 integer, intent(in) :: n
857 class(*), intent(inout), target :: x(:,:,:,:)
858 type(c_ptr), intent(inout) :: x_d
859 integer(c_size_t) :: s
860
861 if (c_associated(x_d)) then
862 call neko_error('Device pointer already associated')
863 end if
864
865 select type (x)
866 type is (integer)
867 s = n * int(4, c_size_t)
868 type is (integer(i8))
869 s = n * int(8, c_size_t)
870 type is (real)
871 s = n * int(4, c_size_t)
872 type is (double precision)
873 s = n * int(8, c_size_t)
874 class default
875 call neko_error('Unknown Fortran type')
876 end select
877
878 call device_alloc(x_d, s)
879 call device_associate(x, x_d)
880
881 end subroutine device_map_r4
882
884 function device_associated_r1(x) result(assoc)
885 class(*), intent(inout), target :: x(:)
886 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
887 logical :: assoc
888
889 select type (x)
890 type is (integer)
891 htbl_ptr_h%ptr = c_loc(x)
892 type is (integer(i8))
893 htbl_ptr_h%ptr = c_loc(x)
894 type is (real)
895 htbl_ptr_h%ptr = c_loc(x)
896 type is (double precision)
897 htbl_ptr_h%ptr = c_loc(x)
898 class default
899 call neko_error('Unknown Fortran type')
900 end select
901
902 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
903 assoc = .true.
904 else
905 assoc = .false.
906 end if
907
908 end function device_associated_r1
909
911 function device_associated_r2(x) result(assoc)
912 class(*), intent(inout), target :: x(:,:)
913 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
914 logical :: assoc
915
916 select type (x)
917 type is (integer)
918 htbl_ptr_h%ptr = c_loc(x)
919 type is (integer(i8))
920 htbl_ptr_h%ptr = c_loc(x)
921 type is (real)
922 htbl_ptr_h%ptr = c_loc(x)
923 type is (double precision)
924 htbl_ptr_h%ptr = c_loc(x)
925 class default
926 call neko_error('Unknown Fortran type')
927 end select
928
929 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
930 assoc = .true.
931 else
932 assoc = .false.
933 end if
934
935 end function device_associated_r2
936
938 function device_associated_r3(x) result(assoc)
939 class(*), intent(inout), target :: x(:,:,:)
940 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
941 logical :: assoc
942
943 select type (x)
944 type is (integer)
945 htbl_ptr_h%ptr = c_loc(x)
946 type is (integer(i8))
947 htbl_ptr_h%ptr = c_loc(x)
948 type is (real)
949 htbl_ptr_h%ptr = c_loc(x)
950 type is (double precision)
951 htbl_ptr_h%ptr = c_loc(x)
952 class default
953 call neko_error('Unknown Fortran type')
954 end select
955
956 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
957 assoc = .true.
958 else
959 assoc = .false.
960 end if
961
962 end function device_associated_r3
963
965 function device_associated_r4(x) result(assoc)
966 class(*), intent(inout), target :: x(:,:,:,:)
967 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
968 logical :: assoc
969
970 select type (x)
971 type is (integer)
972 htbl_ptr_h%ptr = c_loc(x)
973 type is (integer(i8))
974 htbl_ptr_h%ptr = c_loc(x)
975 type is (real)
976 htbl_ptr_h%ptr = c_loc(x)
977 type is (double precision)
978 htbl_ptr_h%ptr = c_loc(x)
979 class default
980 call neko_error('Unknown Fortran type')
981 end select
982
983 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
984 assoc = .true.
985 else
986 assoc = .false.
987 end if
988
989 end function device_associated_r4
990
993 class(*), intent(in), target :: x(:)
994 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
995 type(c_ptr) :: device_get_ptr_r1
996
997 device_get_ptr_r1 = c_null_ptr
998
999 select type (x)
1000 type is (integer)
1001 htbl_ptr_h%ptr = c_loc(x)
1002 type is (integer(i8))
1003 htbl_ptr_h%ptr = c_loc(x)
1004 type is (real)
1005 htbl_ptr_h%ptr = c_loc(x)
1006 type is (double precision)
1007 htbl_ptr_h%ptr = c_loc(x)
1008 class default
1009 call neko_error('Unknown Fortran type')
1010 end select
1011
1012 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1013 device_get_ptr_r1 = htbl_ptr_d%ptr
1014 else
1015 call neko_error('Array not associated with device')
1016 end if
1017 end function device_get_ptr_r1
1018
1021 class(*), intent(in), target :: x(:,:)
1022 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1023 type(c_ptr) :: device_get_ptr_r2
1024
1025 device_get_ptr_r2 = c_null_ptr
1026
1027 select type (x)
1028 type is (integer)
1029 htbl_ptr_h%ptr = c_loc(x)
1030 type is (integer(i8))
1031 htbl_ptr_h%ptr = c_loc(x)
1032 type is (real)
1033 htbl_ptr_h%ptr = c_loc(x)
1034 type is (double precision)
1035 htbl_ptr_h%ptr = c_loc(x)
1036 class default
1037 call neko_error('Unknown Fortran type')
1038 end select
1039
1040 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1041 device_get_ptr_r2 = htbl_ptr_d%ptr
1042 else
1043 call neko_error('Array not associated with device')
1044 end if
1045 end function device_get_ptr_r2
1046
1049 class(*), intent(in), target :: x(:,:,:)
1050 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1051 type(c_ptr) :: device_get_ptr_r3
1052
1053 device_get_ptr_r3 = c_null_ptr
1054
1055 select type (x)
1056 type is (integer)
1057 htbl_ptr_h%ptr = c_loc(x)
1058 type is (integer(i8))
1059 htbl_ptr_h%ptr = c_loc(x)
1060 type is (real)
1061 htbl_ptr_h%ptr = c_loc(x)
1062 type is (double precision)
1063 htbl_ptr_h%ptr = c_loc(x)
1064 class default
1065 call neko_error('Unknown Fortran type')
1066 end select
1067
1068 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1069 device_get_ptr_r3 = htbl_ptr_d%ptr
1070 else
1071 call neko_error('Array not associated with device')
1072 end if
1073 end function device_get_ptr_r3
1074
1077 class(*), intent(in), target :: x(:,:,:,:)
1078 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1079 type(c_ptr) :: device_get_ptr_r4
1080
1081 device_get_ptr_r4 = c_null_ptr
1082
1083 select type (x)
1084 type is (integer)
1085 htbl_ptr_h%ptr = c_loc(x)
1086 type is (integer(i8))
1087 htbl_ptr_h%ptr = c_loc(x)
1088 type is (real)
1089 htbl_ptr_h%ptr = c_loc(x)
1090 type is (double precision)
1091 htbl_ptr_h%ptr = c_loc(x)
1092 class default
1093 call neko_error('Unknown Fortran type')
1094 end select
1095
1096 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1097 device_get_ptr_r4 = htbl_ptr_d%ptr
1098 else
1099 call neko_error('Array not associated with device')
1100 end if
1101 end function device_get_ptr_r4
1102
1105#ifdef HAVE_HIP
1106 if (hipdevicesynchronize() .ne. hipsuccess) then
1107 call neko_error('Error during device sync')
1108 end if
1109#elif HAVE_CUDA
1110 if (cudadevicesynchronize() .ne. cudasuccess) then
1111 call neko_error('Error during device sync')
1112 end if
1113#elif HAVE_OPENCL
1114 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1115 call neko_error('Error during device sync')
1116 end if
1117#endif
1118 end subroutine device_sync_device
1119
1121 subroutine device_sync_stream(stream)
1122 type(c_ptr), intent(in) :: stream
1123#ifdef HAVE_HIP
1124 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1125 call neko_error('Error during stream sync')
1126 end if
1127#elif HAVE_CUDA
1128 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1129 call neko_error('Error during stream sync')
1130 end if
1131#elif HAVE_OPENCL
1132 if (clfinish(stream) .ne. cl_success) then
1133 call neko_error('Error during stream sync')
1134 end if
1135#endif
1136 end subroutine device_sync_stream
1137
1139 subroutine device_stream_create(stream, flags)
1140 type(c_ptr), intent(inout) :: stream
1141 integer, optional :: flags
1142 integer :: ierr
1143#ifdef HAVE_HIP
1144 if (present(flags)) then
1145 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1146 call neko_error('Error during stream create (w. flags)')
1147 end if
1148 else
1149 if (hipstreamcreate(stream) .ne. hipsuccess) then
1150 call neko_error('Error during stream create')
1151 end if
1152 end if
1153#elif HAVE_CUDA
1154 if (present(flags)) then
1155 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1156 call neko_error('Error during stream create (w. flags)')
1157 end if
1158 else
1159 if (cudastreamcreate(stream) .ne. cudasuccess) then
1160 call neko_error('Error during stream create')
1161 end if
1162 end if
1163#elif HAVE_OPENCL
1164 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1165 if (ierr .ne. cl_success) then
1166 call neko_error('Error during stream create')
1167 end if
1168#endif
1169 end subroutine device_stream_create
1170
1172 subroutine device_stream_create_with_priority(stream, flags, prio)
1173 type(c_ptr), intent(inout) :: stream
1174 integer, intent(in) :: flags, prio
1175#ifdef HAVE_HIP
1176 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1177 call neko_error('Error during stream create (w. priority)')
1178 end if
1179#elif HAVE_CUDA
1180 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1181 call neko_error('Error during stream create (w. priority)')
1182 end if
1183#elif HAVE_OPENCL
1184 call neko_error('Not implemented yet')
1185#endif
1187
1189 subroutine device_stream_destroy(stream)
1190 type(c_ptr), intent(inout) :: stream
1191#ifdef HAVE_HIP
1192 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1193 call neko_error('Error during stream destroy')
1194 end if
1195#elif HAVE_CUDA
1196 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1197 call neko_error('Error during stream destroy')
1198 end if
1199#elif HAVE_OPENCL
1200 if (clreleasecommandqueue(stream) .ne. cl_success) then
1201 call neko_error('Error during stream destroy')
1202 end if
1203#endif
1204 end subroutine device_stream_destroy
1205
1207 subroutine device_stream_wait_event(stream, event, flags)
1208 type(c_ptr), intent(in) :: stream
1209 type(c_ptr), target, intent(in) :: event
1210 integer :: flags
1211#ifdef HAVE_HIP
1212 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1213 call neko_error('Error during stream sync')
1214 end if
1215#elif HAVE_CUDA
1216 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1217 call neko_error('Error during stream sync')
1218 end if
1219#elif HAVE_OPENCL
1220 if (clenqueuebarrier(stream) .ne. cl_success) then
1221 call neko_error('Error during barrier')
1222 end if
1223 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1224 call neko_error('Error during stream sync')
1225 end if
1226#endif
1227 end subroutine device_stream_wait_event
1228
1231#if HAVE_CUDA
1232 if (cudaprofilerstart() .ne. cudasuccess) then
1233 call neko_error('Error starting profiler')
1234 end if
1235#endif
1236 end subroutine device_profiler_start
1237
1240#if HAVE_CUDA
1241 if (cudaprofilerstop() .ne. cudasuccess) then
1242 call neko_error('Error stopping profiler')
1243 end if
1244#endif
1245 end subroutine device_profiler_stop
1246
1248 subroutine device_event_create(event, flags)
1249 type(c_ptr), intent(inout) :: event
1250 integer, optional :: flags
1251 integer :: ierr
1252#ifdef HAVE_HIP
1253 if (present(flags)) then
1254 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1255 call neko_error('Error during event create (w. flags)')
1256 end if
1257 else
1258 if (hipeventcreate(event) .ne. hipsuccess) then
1259 call neko_error('Error during event create')
1260 end if
1261 end if
1262#elif HAVE_CUDA
1263 if (present(flags)) then
1264 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1265 call neko_error('Error during event create (w. flags)')
1266 end if
1267 else
1268 if (cudaeventcreate(event) .ne. cudasuccess) then
1269 call neko_error('Error during event create')
1270 end if
1271 end if
1272#elif HAVE_OPENCL
1273 event = c_null_ptr
1274#endif
1275 end subroutine device_event_create
1276
1278 subroutine device_event_destroy(event)
1279 type(c_ptr), intent(inout) :: event
1280#ifdef HAVE_HIP
1281 if (hipeventdestroy(event) .ne. hipsuccess) then
1282 call neko_error('Error during event destroy')
1283 end if
1284#elif HAVE_CUDA
1285 if (cudaeventdestroy(event) .ne. cudasuccess) then
1286 call neko_error('Error during event destroy')
1287 end if
1288#elif HAVE_OPENCL
1289 event = c_null_ptr
1290#endif
1291 end subroutine device_event_destroy
1292
1294 subroutine device_event_record(event, stream)
1295 type(c_ptr), target, intent(in) :: event
1296 type(c_ptr), intent(in) :: stream
1297#ifdef HAVE_HIP
1298 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1299 call neko_error('Error recording an event')
1300 end if
1301#elif HAVE_CUDA
1302 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1303 call neko_error('Error recording an event')
1304 end if
1305#elif HAVE_OPENCL
1306 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1307 call neko_error('Error recording an event')
1308 end if
1309#endif
1310 end subroutine device_event_record
1311
1313 subroutine device_event_sync(event)
1314 type(c_ptr), target, intent(in) :: event
1315#ifdef HAVE_HIP
1316 if (hipeventsynchronize(event) .ne. hipsuccess) then
1317 call neko_error('Error during event sync')
1318 end if
1319#elif HAVE_CUDA
1320 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1321 call neko_error('Error during event sync')
1322 end if
1323#elif HAVE_OPENCL
1324 if (c_associated(event)) then
1325 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1326 call neko_error('Error during event sync')
1327 end if
1328 end if
1329#endif
1330 end subroutine device_event_sync
1331
1332end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:83
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:89
Deassociate a Fortran array from a device pointer.
Definition device.F90:95
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Synchronize a device or stream.
Definition device.F90:107
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1295
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1314
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:598
subroutine, public device_finalize
Definition device.F90:150
integer, parameter, public device_to_device
Definition device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1077
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:993
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:68
integer, parameter, public host_to_device
Definition device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:827
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
Definition device.F90:58
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:464
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:939
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:112
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1240
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:721
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1122
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:1049
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1231
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:798
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:319
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:856
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:435
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1279
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1173
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1140
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:745
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1105
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:573
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:395
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1208
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:769
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:281
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1249
integer function, public device_count()
Return the number of available devices.
Definition device.F90:179
subroutine, public device_name(name)
Definition device.F90:166
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:966
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:912
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:65
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:54
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:1021
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:648
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:673
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:62
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:697
subroutine, public device_init
Definition device.F90:128
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:623
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:885
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
Definition device.F90:238
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:357
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1190
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:271
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:236
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:258
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:288
Implements a hash table ADT.
Definition htable.f90:36
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:122
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:142