Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
39 use metal_intf
41 use htable, only : htable_cptr_t, h_cptr_t
42 use utils, only : neko_error
44 use, intrinsic :: iso_c_binding
45 implicit none
46 private
47
48 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
50
52 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
53
55 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
56
57#ifdef HAVE_OPENCL
58
59 type(c_ptr), public, bind(c) :: prf_cmd_queue = c_null_ptr
60#endif
61
63 type(c_ptr), public, bind(c) :: glb_cmd_event
64
66 integer, public :: strm_high_prio
67
69 integer, public :: strm_low_prio
70
75 end interface device_memcpy
76
78 interface device_map
79 module procedure device_map_r1, device_map_r2, &
81 end interface device_map
82
87 end interface device_unmap
88
93 end interface device_associate
94
99 end interface device_associated
100
105 end interface device_deassociate
106
111 end interface device_get_ptr
112
114 interface device_sync
115 module procedure device_sync_device, device_sync_stream
116 end interface device_sync
117
120
129
130 private :: device_memcpy_common
131
132contains
133
134 subroutine device_init
135#if defined(HAVE_HIP) || defined(HAVE_CUDA) || \
136 defined(have_opencl) || defined(have_metal)
137 call device_addrtbl%init(64)
138
139#ifdef HAVE_HIP
141#elif HAVE_CUDA
143#elif HAVE_OPENCL
145#elif HAVE_METAL
146 call metal_init(glb_cmd_queue, aux_cmd_queue)
147#endif
149#endif
150
151 ! Check the device count against the number of MPI ranks
152 if (neko_bcknd_device .eq. 1) then
153 if (device_count() .ne. 1) then
154 call neko_error('Only one device is supported per MPI rank')
155 end if
156 end if
157 end subroutine device_init
158
160#if defined(HAVE_HIP) || defined(HAVE_CUDA) || \
161 defined(have_opencl) || defined(have_metal)
162 call device_addrtbl%free()
164
165#ifdef HAVE_HIP
167#elif HAVE_CUDA
169#elif HAVE_OPENCL
172#elif HAVE_METAL
173 call metal_finalize(glb_cmd_queue, aux_cmd_queue)
174#endif
175#endif
176 end subroutine device_finalize
177
178 subroutine device_name(name)
179 character(len=*), intent(inout) :: name
180
181#ifdef HAVE_HIP
182 call hip_device_name(name)
183#elif HAVE_CUDA
184 call cuda_device_name(name)
185#elif HAVE_OPENCL
186 call opencl_device_name(name)
187#elif HAVE_METAL
188 call metal_device_name(name)
189#endif
190 end subroutine device_name
191
193 integer function device_count()
194#ifdef HAVE_HIP
196#elif HAVE_CUDA
198#elif HAVE_OPENCL
200#elif HAVE_METAL
201 device_count = metal_device_count()
202#else
203 device_count = 0
204#endif
205 end function device_count
206
208 subroutine device_alloc(x_d, s)
209 type(c_ptr), intent(inout) :: x_d
210 integer(c_size_t) :: s
211 integer :: ierr
212
213 if (s .eq. 0) then
214 call device_sync()
215 x_d = c_null_ptr
216 return
217 end if
218#ifdef HAVE_HIP
219 if (hipmalloc(x_d, s) .ne. hipsuccess) then
220 call neko_error('Memory allocation on device failed')
221 end if
222#elif HAVE_CUDA
223 if (cudamalloc(x_d, s) .ne. cudasuccess) then
224 call neko_error('Memory allocation on device failed')
225 end if
226#elif HAVE_OPENCL
227 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
228 if (ierr .ne. cl_success) then
229 call neko_error('Memory allocation on device failed')
230 end if
231#elif HAVE_METAL
232 if (metalalloc(x_d, s) .ne. metalsuccess) then
233 call neko_error('Memory allocation on device failed')
234 end if
235#endif
236 end subroutine device_alloc
237
239 subroutine device_free(x_d)
240 type(c_ptr), intent(inout) :: x_d
241#ifdef HAVE_HIP
242 if (hipfree(x_d) .ne. hipsuccess) then
243 call neko_error('Memory deallocation on device failed')
244 end if
245#elif HAVE_CUDA
246 if (cudafree(x_d) .ne. cudasuccess) then
247 call neko_error('Memory deallocation on device failed')
248 end if
249#elif HAVE_OPENCL
250 if (clreleasememobject(x_d) .ne. cl_success) then
251 call neko_error('Memory deallocation on device failed')
252 end if
253#elif HAVE_METAL
254 if (metalfree(x_d) .ne. metalsuccess) then
255 call neko_error('Memory deallocation on device failed')
256 end if
257#endif
258 x_d = c_null_ptr
259 end subroutine device_free
260
262 subroutine device_memset(x_d, v, s, sync, strm)
263 type(c_ptr), intent(inout) :: x_d
264 integer(c_int), target, value :: v
265 integer(c_size_t), intent(in) :: s
266 logical, optional :: sync
267 type(c_ptr), optional :: strm
268 type(c_ptr) :: stream
269 logical :: sync_device
270
271 if (present(sync)) then
272 sync_device = sync
273 else
274 sync_device = .false.
275 end if
276
277 if (present(strm)) then
278 stream = strm
279 else
280 stream = glb_cmd_queue
281 end if
282
283#ifdef HAVE_HIP
284 if (hipmemsetasync(x_d, v, s, stream) .ne. hipsuccess) then
285 call neko_error('Device memset async failed')
286 end if
287#elif HAVE_CUDA
288 if (cudamemsetasync(x_d, v, s, stream) .ne. cudasuccess) then
289 call neko_error('Device memset async failed')
290 end if
291#elif HAVE_OPENCL
292 if (clenqueuefillbuffer(stream, x_d, c_loc(v), c_sizeof(v), 0_i8, &
293 s, 0, c_null_ptr, c_null_ptr) .ne. cl_success) then
294 call neko_error('Device memset async failed')
295 end if
296#elif HAVE_METAL
297 if (metalmemset(x_d, v, s) .ne. metalsuccess) then
298 call neko_error('Device memset failed')
299 end if
300#endif
301
302 if (sync_device) then
303 call device_sync_stream(stream)
304 end if
305
306 end subroutine device_memset
307
309 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
310 integer, intent(in) :: n
311 class(*), intent(inout), target :: x(:)
312 type(c_ptr), intent(inout) :: x_d
313 integer, intent(in), value :: dir
314 logical :: sync
315 type(c_ptr), optional :: strm
316 type(c_ptr) :: ptr_h, copy_stream
317 integer(c_size_t) :: s
318
319 if (present(strm)) then
320 copy_stream = strm
321 else
322 copy_stream = glb_cmd_queue
323 end if
324
325 select type (x)
326 type is (integer)
327 s = n * int(4, c_size_t)
328 ptr_h = c_loc(x)
329 type is (integer(i8))
330 s = n * int(8, c_size_t)
331 ptr_h = c_loc(x)
332 type is (real)
333 s = n * int(4, c_size_t)
334 ptr_h = c_loc(x)
335 type is (double precision)
336 s = n * int(8, c_size_t)
337 ptr_h = c_loc(x)
338 class default
339 call neko_error('Unknown Fortran type')
340 end select
341
342 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
343
344 end subroutine device_memcpy_r1
345
347 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
348 integer, intent(in) :: n
349 class(*), intent(inout), target :: x(:,:)
350 type(c_ptr), intent(inout) :: x_d
351 integer, intent(in), value :: dir
352 logical :: sync
353 type(c_ptr), optional :: strm
354 type(c_ptr) :: ptr_h, copy_stream
355 integer(c_size_t) :: s
356
357 if (present(strm)) then
358 copy_stream = strm
359 else
360 copy_stream = glb_cmd_queue
361 end if
362
363 select type (x)
364 type is (integer)
365 s = n * int(4, c_size_t)
366 ptr_h = c_loc(x)
367 type is (integer(i8))
368 s = n * int(8, c_size_t)
369 ptr_h = c_loc(x)
370 type is (real)
371 s = n * int(4, c_size_t)
372 ptr_h = c_loc(x)
373 type is (double precision)
374 s = n * int(8, c_size_t)
375 ptr_h = c_loc(x)
376 class default
377 call neko_error('Unknown Fortran type')
378 end select
379
380 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
381
382 end subroutine device_memcpy_r2
383
385 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
386 integer, intent(in) :: n
387 class(*), intent(inout), target :: x(:,:,:)
388 type(c_ptr), intent(inout) :: x_d
389 integer, intent(in), value :: dir
390 logical :: sync
391 type(c_ptr), optional :: strm
392 type(c_ptr) :: ptr_h, copy_stream
393 integer(c_size_t) :: s
394
395 if (present(strm)) then
396 copy_stream = strm
397 else
398 copy_stream = glb_cmd_queue
399 end if
400
401 select type (x)
402 type is (integer)
403 s = n * int(4, c_size_t)
404 ptr_h = c_loc(x)
405 type is (integer(i8))
406 s = n * int(8, c_size_t)
407 ptr_h = c_loc(x)
408 type is (real)
409 s = n * int(4, c_size_t)
410 ptr_h = c_loc(x)
411 type is (double precision)
412 s = n * int(8, c_size_t)
413 ptr_h = c_loc(x)
414 class default
415 call neko_error('Unknown Fortran type')
416 end select
417
418 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
419
420 end subroutine device_memcpy_r3
421
423 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
424 integer, intent(in) :: n
425 class(*), intent(inout), target :: x(:,:,:,:)
426 type(c_ptr), intent(inout) :: x_d
427 integer, intent(in), value :: dir
428 logical :: sync
429 type(c_ptr), optional :: strm
430 type(c_ptr) :: ptr_h, copy_stream
431 integer(c_size_t) :: s
432
433 if (present(strm)) then
434 copy_stream = strm
435 else
436 copy_stream = glb_cmd_queue
437 end if
438
439 select type (x)
440 type is (integer)
441 s = n * int(4, c_size_t)
442 ptr_h = c_loc(x)
443 type is (integer(i8))
444 s = n * int(8, c_size_t)
445 ptr_h = c_loc(x)
446 type is (real)
447 s = n * int(4, c_size_t)
448 ptr_h = c_loc(x)
449 type is (double precision)
450 s = n * int(8, c_size_t)
451 ptr_h = c_loc(x)
452 class default
453 call neko_error('Unknown Fortran type')
454 end select
455
456 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
457
458 end subroutine device_memcpy_r4
459
463 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
464 type(c_ptr), intent(inout) :: dst
465 type(c_ptr), intent(inout) :: src
466 integer(c_size_t), intent(in) :: s
467 integer, intent(in), value :: dir
468 logical, optional :: sync
469 type(c_ptr), optional :: strm
470 type(c_ptr) :: copy_stream
471 logical :: sync_device
472
473 if (present(sync)) then
474 sync_device = sync
475 else
476 sync_device = .false.
477 end if
478
479 if (present(strm)) then
480 copy_stream = strm
481 else
482 copy_stream = glb_cmd_queue
483 end if
484
485 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
486
487 end subroutine device_memcpy_cptr
488
492 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
493 type(c_ptr), intent(inout) :: ptr_h
494 type(c_ptr), intent(inout) :: x_d
495 integer(c_size_t), intent(in) :: s
496 integer, intent(in), value :: dir
497 logical, intent(in) :: sync_device
498 type(c_ptr), intent(inout) :: stream
499
500 if (s .eq. 0) then
501 if (sync_device) then
502 call device_sync_stream(stream)
503 end if
504 return
505 end if
506
507#ifdef HAVE_HIP
508 if (dir .eq. host_to_device) then
509 if (hipmemcpyasync(x_d, ptr_h, s, &
510 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
511 call neko_error('Device memcpy async (host-to-device) failed')
512 end if
513 else if (dir .eq. device_to_host) then
514 if (hipmemcpyasync(ptr_h, x_d, s, &
515 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
516 call neko_error('Device memcpy async (device-to-host) failed')
517 end if
518 else if (dir .eq. device_to_device) then
519 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
520 .ne. hipsuccess) then
521 call neko_error('Device memcpy async (device-to-device) failed')
522 end if
523 else
524 call neko_error('Device memcpy failed (invalid direction')
525 end if
526 if (sync_device) then
527 call device_sync_stream(stream)
528 end if
529#elif HAVE_CUDA
530 if (dir .eq. host_to_device) then
531 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
532 .ne. cudasuccess) then
533 call neko_error('Device memcpy async (host-to-device) failed')
534 end if
535 else if (dir .eq. device_to_host) then
536 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
537 .ne. cudasuccess) then
538 call neko_error('Device memcpy async (device-to-host) failed')
539 end if
540 else if (dir .eq. device_to_device) then
541 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
542 .ne. cudasuccess) then
543 call neko_error('Device memcpy async (device-to-device) failed')
544 end if
545 else
546 call neko_error('Device memcpy failed (invalid direction')
547 end if
548 if (sync_device) then
549 call device_sync_stream(stream)
550 end if
551#elif HAVE_OPENCL
552 if (sync_device) then
553 if (dir .eq. host_to_device) then
554 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
555 ptr_h, 0, c_null_ptr, c_null_ptr) &
556 .ne. cl_success) then
557 call neko_error('Device memcpy (host-to-device) failed')
558 end if
559 else if (dir .eq. device_to_host) then
560 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
561 0, c_null_ptr, c_null_ptr) &
562 .ne. cl_success) then
563 call neko_error('Device memcpy (device-to-host) failed')
564 end if
565 else if (dir .eq. device_to_device) then
566 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
567 0, c_null_ptr, c_null_ptr) &
568 .ne. cl_success) then
569 call neko_error('Device memcpy (device-to-device) failed')
570 end if
571 else
572 call neko_error('Device memcpy failed (invalid direction')
573 end if
574 else
575 if (dir .eq. host_to_device) then
576 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
577 ptr_h, 0, c_null_ptr, c_null_ptr) &
578 .ne. cl_success) then
579 call neko_error('Device memcpy (host-to-device) failed')
580 end if
581 else if (dir .eq. device_to_host) then
582 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
583 0, c_null_ptr, c_null_ptr) &
584 .ne. cl_success) then
585 call neko_error('Device memcpy (device-to-host) failed')
586 end if
587 else if (dir .eq. device_to_device) then
588 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
589 0, c_null_ptr, c_null_ptr) &
590 .ne. cl_success) then
591 call neko_error('Device memcpy (device-to-device) failed')
592 end if
593 else
594 call neko_error('Device memcpy failed (invalid direction')
595 end if
596 end if
597#elif HAVE_METAL
598 ! Copies are synchronous on unified memory, the @a sync_device
599 ! and @a stream arguments have no effect
600 if (dir .eq. host_to_device) then
601 if (metalmemcpyhtod(x_d, ptr_h, s) .ne. metalsuccess) then
602 call neko_error('Device memcpy (host-to-device) failed')
603 end if
604 else if (dir .eq. device_to_host) then
605 if (metalmemcpydtoh(ptr_h, x_d, s) .ne. metalsuccess) then
606 call neko_error('Device memcpy (device-to-host) failed')
607 end if
608 else if (dir .eq. device_to_device) then
609 if (metalmemcpydtod(ptr_h, x_d, s) .ne. metalsuccess) then
610 call neko_error('Device memcpy (device-to-device) failed')
611 end if
612 else
613 call neko_error('Device memcpy failed (invalid direction')
614 end if
615#endif
616 end subroutine device_memcpy_common
617
619 subroutine device_associate_r1(x, x_d, n)
620 class(*), intent(inout), target :: x(:)
621 type(c_ptr), intent(inout) :: x_d
622 integer, intent(in), optional :: n
623 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
624 integer :: n_
625
626 if (present(n)) then
627 n_ = n
628 else
629 n_ = size(x)
630 end if
631
632 if (n_ .eq. 0) return
633 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
634 ' to a null device pointer for a non-empty array')
635
636 select type (x)
637 type is (integer)
638 htbl_ptr_h%ptr = c_loc(x)
639 type is (integer(i8))
640 htbl_ptr_h%ptr = c_loc(x)
641 type is (real)
642 htbl_ptr_h%ptr = c_loc(x)
643 type is (double precision)
644 htbl_ptr_h%ptr = c_loc(x)
645 class default
646 call neko_error('Unknown Fortran type')
647 end select
648
649 htbl_ptr_d%ptr = x_d
650
651 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
652
653 end subroutine device_associate_r1
654
656 subroutine device_associate_r2(x, x_d, n)
657 class(*), intent(inout), target :: x(:,:)
658 type(c_ptr), intent(inout) :: x_d
659 integer, intent(in), optional :: n
660 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
661 integer :: n_
662
663 if (present(n)) then
664 n_ = n
665 else
666 n_ = size(x)
667 end if
668
669 if (n_ .eq. 0) return
670 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
671 ' to a null device pointer for a non-empty array')
672
673 select type (x)
674 type is (integer)
675 htbl_ptr_h%ptr = c_loc(x)
676 type is (integer(i8))
677 htbl_ptr_h%ptr = c_loc(x)
678 type is (real)
679 htbl_ptr_h%ptr = c_loc(x)
680 type is (double precision)
681 htbl_ptr_h%ptr = c_loc(x)
682 class default
683 call neko_error('Unknown Fortran type')
684 end select
685
686 htbl_ptr_d%ptr = x_d
687
688 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
689
690 end subroutine device_associate_r2
691
693 subroutine device_associate_r3(x, x_d, n)
694 class(*), intent(inout), target :: x(:,:,:)
695 type(c_ptr), intent(inout) :: x_d
696 integer, intent(in), optional :: n
697 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
698 integer :: n_
699
700 if (present(n)) then
701 n_ = n
702 else
703 n_ = size(x)
704 end if
705
706 if (n_ .eq. 0) return
707 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
708 ' to a null device pointer for a non-empty array')
709 select type (x)
710 type is (integer)
711 htbl_ptr_h%ptr = c_loc(x)
712 type is (integer(i8))
713 htbl_ptr_h%ptr = c_loc(x)
714 type is (real)
715 htbl_ptr_h%ptr = c_loc(x)
716 type is (double precision)
717 htbl_ptr_h%ptr = c_loc(x)
718 class default
719 call neko_error('Unknown Fortran type')
720 end select
721
722 htbl_ptr_d%ptr = x_d
723
724 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
725
726 end subroutine device_associate_r3
727
729 subroutine device_associate_r4(x, x_d, n)
730 class(*), intent(inout), target :: x(:,:,:,:)
731 type(c_ptr), intent(inout) :: x_d
732 integer, intent(in), optional :: n
733 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
734 integer :: n_
735
736 if (present(n)) then
737 n_ = n
738 else
739 n_ = size(x)
740 end if
741
742 if (n_ .eq. 0) return
743 if (.not. c_associated(x_d)) call neko_error('Attempting to associate' // &
744 ' to a null device pointer for a non-empty array')
745
746 select type (x)
747 type is (integer)
748 htbl_ptr_h%ptr = c_loc(x)
749 type is (integer(i8))
750 htbl_ptr_h%ptr = c_loc(x)
751 type is (real)
752 htbl_ptr_h%ptr = c_loc(x)
753 type is (double precision)
754 htbl_ptr_h%ptr = c_loc(x)
755 class default
756 call neko_error('Unknown Fortran type')
757 end select
758
759 htbl_ptr_d%ptr = x_d
760
761 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
762
763 end subroutine device_associate_r4
764
767 class(*), intent(inout), target :: x(:)
768 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
769
770 select type (x)
771 type is (integer)
772 htbl_ptr_h%ptr = c_loc(x)
773 type is (integer(i8))
774 htbl_ptr_h%ptr = c_loc(x)
775 type is (real)
776 htbl_ptr_h%ptr = c_loc(x)
777 type is (double precision)
778 htbl_ptr_h%ptr = c_loc(x)
779 class default
780 call neko_error('Unknown Fortran type')
781 end select
782
783 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
784 call device_addrtbl%remove(htbl_ptr_h)
785 end if
786
787 end subroutine device_deassociate_r1
788
791 class(*), intent(inout), target :: x(:,:)
792 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
793
794 select type (x)
795 type is (integer)
796 htbl_ptr_h%ptr = c_loc(x)
797 type is (integer(i8))
798 htbl_ptr_h%ptr = c_loc(x)
799 type is (real)
800 htbl_ptr_h%ptr = c_loc(x)
801 type is (double precision)
802 htbl_ptr_h%ptr = c_loc(x)
803 class default
804 call neko_error('Unknown Fortran type')
805 end select
806
807 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
808 call device_addrtbl%remove(htbl_ptr_h)
809 end if
810
811 end subroutine device_deassociate_r2
812
815 class(*), intent(inout), target :: x(:,:,:)
816 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
817
818 select type (x)
819 type is (integer)
820 htbl_ptr_h%ptr = c_loc(x)
821 type is (integer(i8))
822 htbl_ptr_h%ptr = c_loc(x)
823 type is (real)
824 htbl_ptr_h%ptr = c_loc(x)
825 type is (double precision)
826 htbl_ptr_h%ptr = c_loc(x)
827 class default
828 call neko_error('Unknown Fortran type')
829 end select
830
831 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
832 call device_addrtbl%remove(htbl_ptr_h)
833 end if
834
835 end subroutine device_deassociate_r3
836
839 class(*), intent(inout), target :: x(:,:,:,:)
840 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
841
842 select type (x)
843 type is (integer)
844 htbl_ptr_h%ptr = c_loc(x)
845 type is (integer(i8))
846 htbl_ptr_h%ptr = c_loc(x)
847 type is (real)
848 htbl_ptr_h%ptr = c_loc(x)
849 type is (double precision)
850 htbl_ptr_h%ptr = c_loc(x)
851 class default
852 call neko_error('Unknown Fortran type')
853 end select
854
855 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
856 call device_addrtbl%remove(htbl_ptr_h)
857 end if
858
859 end subroutine device_deassociate_r4
860
862 subroutine device_map_r1(x, x_d, n)
863 integer, intent(in) :: n
864 class(*), intent(inout), target :: x(:)
865 type(c_ptr), intent(inout) :: x_d
866 integer(c_size_t) :: s
867
868 if (c_associated(x_d)) then
869 call neko_error('Device pointer already associated')
870 end if
871
872 select type (x)
873 type is (integer)
874 s = n * int(4, c_size_t)
875 type is (integer(i8))
876 s = n * int(8, c_size_t)
877 type is (real)
878 s = n * int(4, c_size_t)
879 type is (double precision)
880 s = n * int(8, c_size_t)
881 class default
882 call neko_error('Unknown Fortran type')
883 end select
884
885 call device_alloc(x_d, s)
886 call device_associate(x, x_d, n)
887
888 end subroutine device_map_r1
889
891 subroutine device_map_r2(x, x_d, n)
892 integer, intent(in) :: n
893 class(*), intent(inout), target :: x(:,:)
894 type(c_ptr), intent(inout) :: x_d
895 integer(c_size_t) :: s
896
897 if (c_associated(x_d)) then
898 call neko_error('Device pointer already associated')
899 end if
900
901 select type (x)
902 type is (integer)
903 s = n * int(4, c_size_t)
904 type is (integer(i8))
905 s = n * int(8, c_size_t)
906 type is (real)
907 s = n * int(4, c_size_t)
908 type is (double precision)
909 s = n * int(8, c_size_t)
910 class default
911 call neko_error('Unknown Fortran type')
912 end select
913
914 call device_alloc(x_d, s)
915 call device_associate(x, x_d, n)
916
917 end subroutine device_map_r2
918
920 subroutine device_map_r3(x, x_d, n)
921 integer, intent(in) :: n
922 class(*), intent(inout), target :: x(:,:,:)
923 type(c_ptr), intent(inout) :: x_d
924 integer(c_size_t) :: s
925
926 if (c_associated(x_d)) then
927 call neko_error('Device pointer already associated')
928 end if
929
930 select type (x)
931 type is (integer)
932 s = n * int(4, c_size_t)
933 type is (integer(i8))
934 s = n * int(8, c_size_t)
935 type is (real)
936 s = n * int(4, c_size_t)
937 type is (double precision)
938 s = n * int(8, c_size_t)
939 class default
940 call neko_error('Unknown Fortran type')
941 end select
942
943 call device_alloc(x_d, s)
944 call device_associate(x, x_d, n)
945
946 end subroutine device_map_r3
947
949 subroutine device_map_r4(x, x_d, n)
950 integer, intent(in) :: n
951 class(*), intent(inout), target :: x(:,:,:,:)
952 type(c_ptr), intent(inout) :: x_d
953 integer(c_size_t) :: s
954
955 if (c_associated(x_d)) then
956 call neko_error('Device pointer already associated')
957 end if
958
959 select type (x)
960 type is (integer)
961 s = n * int(4, c_size_t)
962 type is (integer(i8))
963 s = n * int(8, c_size_t)
964 type is (real)
965 s = n * int(4, c_size_t)
966 type is (double precision)
967 s = n * int(8, c_size_t)
968 class default
969 call neko_error('Unknown Fortran type')
970 end select
971
972 call device_alloc(x_d, s)
973 call device_associate(x, x_d, n)
974
975 end subroutine device_map_r4
976
978 subroutine device_unmap_r1(x, x_d)
979 class(*), intent(inout), target :: x(:)
980 type(c_ptr), intent(inout) :: x_d
981 type(c_ptr) :: dev
982 logical :: mapped
983
984 ! Whether dev has a non-null address, meaning that x is mapped.
985 mapped = device_associated(x)
986
987 ! Repeated calls to this routine do nothing
988 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
989 return
990 end if
991
992 ! Device pointer associated with x, should be same as x_d if mapped
993 if (mapped) then
994 dev = device_get_ptr(x)
995 else
996 dev = c_null_ptr
997 end if
998
999 ! Error if:
1000 ! 1) x is not mapped to a device pointer, but x_d is not null.
1001 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1002 ! 3) x is mapped to a device pointer that is not x_d.
1003 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1004 (.not. c_associated(dev, x_d))) then
1005 call neko_error('Inconsistent host/device mapping state in ' // &
1006 'device_unmap')
1007 end if
1008
1009 call device_deassociate(x)
1010 call device_free(x_d)
1011
1012 end subroutine device_unmap_r1
1013
1015 subroutine device_unmap_r2(x, x_d)
1016 class(*), intent(inout), target :: x(:,:)
1017 type(c_ptr), intent(inout) :: x_d
1018 type(c_ptr) :: dev
1019 logical :: mapped
1020
1021 ! Whether dev has a non-null address, meaning that x is mapped.
1022 mapped = device_associated(x)
1023
1024 ! Repeated calls to this routine do nothing
1025 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1026 return
1027 end if
1028
1029 ! Device pointer associated with x, should be same as x_d if mapped
1030 if (mapped) then
1031 dev = device_get_ptr(x)
1032 else
1033 dev = c_null_ptr
1034 end if
1035
1036 ! Error if:
1037 ! 1) x is not mapped to a device pointer, but x_d is not null.
1038 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1039 ! 3) x is mapped to a device pointer that is not x_d.
1040 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1041 (.not. c_associated(dev, x_d))) then
1042 call neko_error('Inconsistent host/device mapping state in ' // &
1043 'device_unmap')
1044 end if
1045
1046 call device_deassociate(x)
1047 call device_free(x_d)
1048
1049 end subroutine device_unmap_r2
1050
1052 subroutine device_unmap_r3(x, x_d)
1053 class(*), intent(inout), target :: x(:,:,:)
1054 type(c_ptr), intent(inout) :: x_d
1055 type(c_ptr) :: dev
1056 logical :: mapped
1057
1058 ! Whether dev has a non-null address, meaning that x is mapped.
1059 mapped = device_associated(x)
1060
1061 ! Repeated calls to this routine do nothing
1062 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1063 return
1064 end if
1065
1066 ! Device pointer associated with x, should be same as x_d if mapped
1067 if (mapped) then
1068 dev = device_get_ptr(x)
1069 else
1070 dev = c_null_ptr
1071 end if
1072
1073 ! Error if:
1074 ! 1) x is not mapped to a device pointer, but x_d is not null.
1075 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1076 ! 3) x is mapped to a device pointer that is not x_d.
1077 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1078 (.not. c_associated(dev, x_d))) then
1079 call neko_error('Inconsistent host/device mapping state in ' // &
1080 'device_unmap')
1081 end if
1082
1083 call device_deassociate(x)
1084 call device_free(x_d)
1085
1086 end subroutine device_unmap_r3
1087
1089 subroutine device_unmap_r4(x, x_d)
1090 class(*), intent(inout), target :: x(:,:,:,:)
1091 type(c_ptr), intent(inout) :: x_d
1092 type(c_ptr) :: dev
1093 logical :: mapped
1094
1095 ! Whether dev has a non-null address, meaning that x is mapped.
1096 mapped = device_associated(x)
1097
1098 ! Repeated calls to this routine do nothing
1099 if ((.not. mapped) .and. (.not. c_associated(x_d))) then
1100 return
1101 end if
1102
1103 ! Device pointer associated with x, should be same as x_d if mapped
1104 if (mapped) then
1105 dev = device_get_ptr(x)
1106 else
1107 dev = c_null_ptr
1108 end if
1109
1110 ! Error if:
1111 ! 1) x is not mapped to a device pointer, but x_d is not null.
1112 ! 2) x_d is not a valid pointer, but x is mapped to some pointer.
1113 ! 3) x is mapped to a device pointer that is not x_d.
1114 if ((.not. mapped) .or. (.not. c_associated(x_d)) .or. &
1115 (.not. c_associated(dev, x_d))) then
1116 call neko_error('Inconsistent host/device mapping state in ' // &
1117 'device_unmap')
1118 end if
1119
1120 call device_deassociate(x)
1121 call device_free(x_d)
1122
1123 end subroutine device_unmap_r4
1124
1126 function device_associated_r1(x) result(assoc)
1127 class(*), intent(inout), target :: x(:)
1128 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1129 logical :: assoc
1130
1131 select type (x)
1132 type is (integer)
1133 htbl_ptr_h%ptr = c_loc(x)
1134 type is (integer(i8))
1135 htbl_ptr_h%ptr = c_loc(x)
1136 type is (real)
1137 htbl_ptr_h%ptr = c_loc(x)
1138 type is (double precision)
1139 htbl_ptr_h%ptr = c_loc(x)
1140 class default
1141 call neko_error('Unknown Fortran type')
1142 end select
1143
1144 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1145 assoc = .true.
1146 else
1147 assoc = .false.
1148 end if
1149
1150 end function device_associated_r1
1151
1153 function device_associated_r2(x) result(assoc)
1154 class(*), intent(inout), target :: x(:,:)
1155 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1156 logical :: assoc
1157
1158 select type (x)
1159 type is (integer)
1160 htbl_ptr_h%ptr = c_loc(x)
1161 type is (integer(i8))
1162 htbl_ptr_h%ptr = c_loc(x)
1163 type is (real)
1164 htbl_ptr_h%ptr = c_loc(x)
1165 type is (double precision)
1166 htbl_ptr_h%ptr = c_loc(x)
1167 class default
1168 call neko_error('Unknown Fortran type')
1169 end select
1170
1171 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1172 assoc = .true.
1173 else
1174 assoc = .false.
1175 end if
1176
1177 end function device_associated_r2
1178
1180 function device_associated_r3(x) result(assoc)
1181 class(*), intent(inout), target :: x(:,:,:)
1182 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1183 logical :: assoc
1184
1185 select type (x)
1186 type is (integer)
1187 htbl_ptr_h%ptr = c_loc(x)
1188 type is (integer(i8))
1189 htbl_ptr_h%ptr = c_loc(x)
1190 type is (real)
1191 htbl_ptr_h%ptr = c_loc(x)
1192 type is (double precision)
1193 htbl_ptr_h%ptr = c_loc(x)
1194 class default
1195 call neko_error('Unknown Fortran type')
1196 end select
1197
1198 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1199 assoc = .true.
1200 else
1201 assoc = .false.
1202 end if
1203
1204 end function device_associated_r3
1205
1207 function device_associated_r4(x) result(assoc)
1208 class(*), intent(inout), target :: x(:,:,:,:)
1209 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1210 logical :: assoc
1211
1212 select type (x)
1213 type is (integer)
1214 htbl_ptr_h%ptr = c_loc(x)
1215 type is (integer(i8))
1216 htbl_ptr_h%ptr = c_loc(x)
1217 type is (real)
1218 htbl_ptr_h%ptr = c_loc(x)
1219 type is (double precision)
1220 htbl_ptr_h%ptr = c_loc(x)
1221 class default
1222 call neko_error('Unknown Fortran type')
1223 end select
1224
1225 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1226 assoc = .true.
1227 else
1228 assoc = .false.
1229 end if
1230
1231 end function device_associated_r4
1232
1235 class(*), intent(in), target :: x(:)
1236 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1237 type(c_ptr) :: device_get_ptr_r1
1238
1239 device_get_ptr_r1 = c_null_ptr
1240
1241 select type (x)
1242 type is (integer)
1243 htbl_ptr_h%ptr = c_loc(x)
1244 type is (integer(i8))
1245 htbl_ptr_h%ptr = c_loc(x)
1246 type is (real)
1247 htbl_ptr_h%ptr = c_loc(x)
1248 type is (double precision)
1249 htbl_ptr_h%ptr = c_loc(x)
1250 class default
1251 call neko_error('Unknown Fortran type')
1252 end select
1253
1254 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1255 device_get_ptr_r1 = htbl_ptr_d%ptr
1256 else
1257 call neko_error('Array not associated with device')
1258 end if
1259 end function device_get_ptr_r1
1260
1263 class(*), intent(in), target :: x(:,:)
1264 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1265 type(c_ptr) :: device_get_ptr_r2
1266
1267 device_get_ptr_r2 = c_null_ptr
1268
1269 select type (x)
1270 type is (integer)
1271 htbl_ptr_h%ptr = c_loc(x)
1272 type is (integer(i8))
1273 htbl_ptr_h%ptr = c_loc(x)
1274 type is (real)
1275 htbl_ptr_h%ptr = c_loc(x)
1276 type is (double precision)
1277 htbl_ptr_h%ptr = c_loc(x)
1278 class default
1279 call neko_error('Unknown Fortran type')
1280 end select
1281
1282 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1283 device_get_ptr_r2 = htbl_ptr_d%ptr
1284 else
1285 call neko_error('Array not associated with device')
1286 end if
1287 end function device_get_ptr_r2
1288
1291 class(*), intent(in), target :: x(:,:,:)
1292 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1293 type(c_ptr) :: device_get_ptr_r3
1294
1295 device_get_ptr_r3 = c_null_ptr
1296
1297 select type (x)
1298 type is (integer)
1299 htbl_ptr_h%ptr = c_loc(x)
1300 type is (integer(i8))
1301 htbl_ptr_h%ptr = c_loc(x)
1302 type is (real)
1303 htbl_ptr_h%ptr = c_loc(x)
1304 type is (double precision)
1305 htbl_ptr_h%ptr = c_loc(x)
1306 class default
1307 call neko_error('Unknown Fortran type')
1308 end select
1309
1310 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1311 device_get_ptr_r3 = htbl_ptr_d%ptr
1312 else
1313 call neko_error('Array not associated with device')
1314 end if
1315 end function device_get_ptr_r3
1316
1319 class(*), intent(in), target :: x(:,:,:,:)
1320 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1321 type(c_ptr) :: device_get_ptr_r4
1322
1323 device_get_ptr_r4 = c_null_ptr
1324
1325 select type (x)
1326 type is (integer)
1327 htbl_ptr_h%ptr = c_loc(x)
1328 type is (integer(i8))
1329 htbl_ptr_h%ptr = c_loc(x)
1330 type is (real)
1331 htbl_ptr_h%ptr = c_loc(x)
1332 type is (double precision)
1333 htbl_ptr_h%ptr = c_loc(x)
1334 class default
1335 call neko_error('Unknown Fortran type')
1336 end select
1337
1338 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1339 device_get_ptr_r4 = htbl_ptr_d%ptr
1340 else
1341 call neko_error('Array not associated with device')
1342 end if
1343 end function device_get_ptr_r4
1344
1347#ifdef HAVE_HIP
1348 if (hipdevicesynchronize() .ne. hipsuccess) then
1349 call neko_error('Error during device sync')
1350 end if
1351#elif HAVE_CUDA
1352 if (cudadevicesynchronize() .ne. cudasuccess) then
1353 call neko_error('Error during device sync')
1354 end if
1355#elif HAVE_OPENCL
1356 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1357 call neko_error('Error during device sync')
1358 end if
1359#elif HAVE_METAL
1360 if (metaldevicesynchronize() .ne. metalsuccess) then
1361 call neko_error('Error during device sync')
1362 end if
1363#endif
1364 end subroutine device_sync_device
1365
1367 subroutine device_sync_stream(stream)
1368 type(c_ptr), intent(in) :: stream
1369#ifdef HAVE_HIP
1370 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1371 call neko_error('Error during stream sync')
1372 end if
1373#elif HAVE_CUDA
1374 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1375 call neko_error('Error during stream sync')
1376 end if
1377#elif HAVE_OPENCL
1378 if (clfinish(stream) .ne. cl_success) then
1379 call neko_error('Error during stream sync')
1380 end if
1381#elif HAVE_METAL
1382 if (metalstreamsynchronize(stream) .ne. metalsuccess) then
1383 call neko_error('Error during stream sync')
1384 end if
1385#endif
1386 end subroutine device_sync_stream
1387
1389 subroutine device_stream_create(stream, flags)
1390 type(c_ptr), intent(inout) :: stream
1391 integer, optional :: flags
1392 integer :: ierr
1393#ifdef HAVE_HIP
1394 if (present(flags)) then
1395 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1396 call neko_error('Error during stream create (w. flags)')
1397 end if
1398 else
1399 if (hipstreamcreate(stream) .ne. hipsuccess) then
1400 call neko_error('Error during stream create')
1401 end if
1402 end if
1403#elif HAVE_CUDA
1404 if (present(flags)) then
1405 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1406 call neko_error('Error during stream create (w. flags)')
1407 end if
1408 else
1409 if (cudastreamcreate(stream) .ne. cudasuccess) then
1410 call neko_error('Error during stream create')
1411 end if
1412 end if
1413#elif HAVE_OPENCL
1414 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1415 if (ierr .ne. cl_success) then
1416 call neko_error('Error during stream create')
1417 end if
1418#elif HAVE_METAL
1419 if (metalstreamcreate(stream) .ne. metalsuccess) then
1420 call neko_error('Error during stream create')
1421 end if
1422#endif
1423 end subroutine device_stream_create
1424
1426 subroutine device_stream_create_with_priority(stream, flags, prio)
1427 type(c_ptr), intent(inout) :: stream
1428 integer, intent(in) :: flags, prio
1429#ifdef HAVE_HIP
1430 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1431 call neko_error('Error during stream create (w. priority)')
1432 end if
1433#elif HAVE_CUDA
1434 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1435 call neko_error('Error during stream create (w. priority)')
1436 end if
1437#elif HAVE_OPENCL
1438 call neko_error('Not implemented yet')
1439#elif HAVE_METAL
1440 ! Metal command queues have no priority, create a plain queue
1441 if (metalstreamcreate(stream) .ne. metalsuccess) then
1442 call neko_error('Error during stream create (w. priority)')
1443 end if
1444#endif
1446
1448 subroutine device_stream_destroy(stream)
1449 type(c_ptr), intent(inout) :: stream
1450#ifdef HAVE_HIP
1451 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1452 call neko_error('Error during stream destroy')
1453 end if
1454#elif HAVE_CUDA
1455 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1456 call neko_error('Error during stream destroy')
1457 end if
1458#elif HAVE_OPENCL
1459 if (clreleasecommandqueue(stream) .ne. cl_success) then
1460 call neko_error('Error during stream destroy')
1461 end if
1462#elif HAVE_METAL
1463 if (metalstreamdestroy(stream) .ne. metalsuccess) then
1464 call neko_error('Error during stream destroy')
1465 end if
1466#endif
1467 end subroutine device_stream_destroy
1468
1470 subroutine device_stream_wait_event(stream, event, flags)
1471 type(c_ptr), intent(in) :: stream
1472 type(c_ptr), target, intent(in) :: event
1473 integer :: flags
1474#ifdef HAVE_HIP
1475 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1476 call neko_error('Error during stream sync')
1477 end if
1478#elif HAVE_CUDA
1479 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1480 call neko_error('Error during stream sync')
1481 end if
1482#elif HAVE_OPENCL
1483 if (clenqueuebarrier(stream) .ne. cl_success) then
1484 call neko_error('Error during barrier')
1485 end if
1486 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1487 call neko_error('Error during stream sync')
1488 end if
1489#elif HAVE_METAL
1490 if (metalstreamwaitevent(stream, event) .ne. metalsuccess) then
1491 call neko_error('Error during stream sync')
1492 end if
1493#endif
1494 end subroutine device_stream_wait_event
1495
1498#if HAVE_CUDA
1499 if (cudaprofilerstart() .ne. cudasuccess) then
1500 call neko_error('Error starting profiler')
1501 end if
1502#endif
1503 end subroutine device_profiler_start
1504
1507#if HAVE_CUDA
1508 if (cudaprofilerstop() .ne. cudasuccess) then
1509 call neko_error('Error stopping profiler')
1510 end if
1511#endif
1512 end subroutine device_profiler_stop
1513
1515 subroutine device_event_create(event, flags)
1516 type(c_ptr), intent(inout) :: event
1517 integer, optional :: flags
1518 integer :: ierr
1519#ifdef HAVE_HIP
1520 if (present(flags)) then
1521 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1522 call neko_error('Error during event create (w. flags)')
1523 end if
1524 else
1525 if (hipeventcreate(event) .ne. hipsuccess) then
1526 call neko_error('Error during event create')
1527 end if
1528 end if
1529#elif HAVE_CUDA
1530 if (present(flags)) then
1531 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1532 call neko_error('Error during event create (w. flags)')
1533 end if
1534 else
1535 if (cudaeventcreate(event) .ne. cudasuccess) then
1536 call neko_error('Error during event create')
1537 end if
1538 end if
1539#elif HAVE_OPENCL
1540 event = c_null_ptr
1541#elif HAVE_METAL
1542 if (metaleventcreate(event) .ne. metalsuccess) then
1543 call neko_error('Error during event create')
1544 end if
1545#endif
1546 end subroutine device_event_create
1547
1549 subroutine device_event_destroy(event)
1550 type(c_ptr), intent(inout) :: event
1551#ifdef HAVE_HIP
1552 if (hipeventdestroy(event) .ne. hipsuccess) then
1553 call neko_error('Error during event destroy')
1554 end if
1555#elif HAVE_CUDA
1556 if (cudaeventdestroy(event) .ne. cudasuccess) then
1557 call neko_error('Error during event destroy')
1558 end if
1559#elif HAVE_OPENCL
1560 event = c_null_ptr
1561#elif HAVE_METAL
1562 if (metaleventdestroy(event) .ne. metalsuccess) then
1563 call neko_error('Error during event destroy')
1564 end if
1565 event = c_null_ptr
1566#endif
1567 end subroutine device_event_destroy
1568
1570 subroutine device_event_record(event, stream)
1571 type(c_ptr), target, intent(in) :: event
1572 type(c_ptr), intent(in) :: stream
1573#ifdef HAVE_HIP
1574 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1575 call neko_error('Error recording an event')
1576 end if
1577#elif HAVE_CUDA
1578 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1579 call neko_error('Error recording an event')
1580 end if
1581#elif HAVE_OPENCL
1582 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1583 call neko_error('Error recording an event')
1584 end if
1585#elif HAVE_METAL
1586 if (metaleventrecord(event, stream) .ne. metalsuccess) then
1587 call neko_error('Error recording an event')
1588 end if
1589#endif
1590 end subroutine device_event_record
1591
1593 subroutine device_event_sync(event)
1594 type(c_ptr), target, intent(in) :: event
1595#ifdef HAVE_HIP
1596 if (hipeventsynchronize(event) .ne. hipsuccess) then
1597 call neko_error('Error during event sync')
1598 end if
1599#elif HAVE_CUDA
1600 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1601 call neko_error('Error during event sync')
1602 end if
1603#elif HAVE_OPENCL
1604 if (c_associated(event)) then
1605 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1606 call neko_error('Error during event sync')
1607 end if
1608 end if
1609#elif HAVE_METAL
1610 if (c_associated(event)) then
1611 if (metaleventsynchronize(event) .ne. metalsuccess) then
1612 call neko_error('Error during event sync')
1613 end if
1614 end if
1615#endif
1616 end subroutine device_event_sync
1617
1618end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:90
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:96
Deassociate a Fortran array from a device pointer.
Definition device.F90:102
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Synchronize a device or stream.
Definition device.F90:114
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1571
subroutine device_associate_r4(x, x_d, n)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:730
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
subroutine device_associate_r1(x, x_d, n)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:620
subroutine, public device_finalize
Definition device.F90:160
integer, parameter, public device_to_device
Definition device.F90:48
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1319
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:1235
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:69
integer, parameter, public host_to_device
Definition device.F90:48
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:921
type(c_ptr), bind(C), public prf_cmd_queue
Profiling command queue.
Definition device.F90:59
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:493
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:1181
subroutine device_unmap_r4(x, x_d)
Unmap a Fortran rank 4 array from a device (deassociate and free)
Definition device.F90:1090
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:119
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1507
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:815
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1368
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:1291
subroutine device_unmap_r2(x, x_d)
Unmap a Fortran rank 2 array from a device (deassociate and free)
Definition device.F90:1016
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1498
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:892
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:348
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:950
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:240
integer, parameter, public device_to_host
Definition device.F90:48
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:464
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1550
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:209
subroutine device_associate_r2(x, x_d, n)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:657
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1427
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1390
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:839
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1347
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:424
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1471
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:863
subroutine device_associate_r3(x, x_d, n)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:694
subroutine device_unmap_r1(x, x_d)
Unmap a Fortran rank 1 array from a device (deassociate and free)
Definition device.F90:979
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:310
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:52
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1516
integer function, public device_count()
Return the number of available devices.
Definition device.F90:194
subroutine, public device_name(name)
Definition device.F90:179
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:1208
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:1154
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:66
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:55
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:1263
subroutine device_unmap_r3(x, x_d)
Unmap a Fortran rank 3 array from a device (deassociate and free)
Definition device.F90:1053
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:767
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:63
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:791
subroutine, public device_init
Definition device.F90:135
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:1127
subroutine, public device_memset(x_d, v, s, sync, strm)
Set memory on the device to a value.
Definition device.F90:263
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:386
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1449
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:282
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:242
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:264
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:299
Implements a hash table ADT.
Definition htable.f90:52
Fortran interface to the Metal device layer (see device/metal/metal.m)
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue, prf_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:134
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:162