Loading [MathJax]/extensions/tex2jax.js
Neko 0.9.99
A portable framework for high-order spectral element flow simulations
All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros Pages
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
39 use htable, only : htable_cptr_t, h_cptr_t
40 use utils, only : neko_error
42 use, intrinsic :: iso_c_binding
43 implicit none
44 private
45
46 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
48
50 type(c_ptr), public, bind(c) :: glb_cmd_queue = c_null_ptr
51
53 type(c_ptr), public, bind(c) :: aux_cmd_queue = c_null_ptr
54
56 type(c_ptr), public, bind(c) :: glb_cmd_event
57
59 integer, public :: strm_high_prio
60
62 integer, public :: strm_low_prio
63
68 end interface device_memcpy
69
71 interface device_map
72 module procedure device_map_r1, device_map_r2, &
74 end interface device_map
75
80 end interface device_associate
81
86 end interface device_associated
87
92 end interface device_deassociate
93
98 end interface device_get_ptr
99
101 interface device_sync
102 module procedure device_sync_device, device_sync_stream
103 end interface device_sync
104
107
116
117 private :: device_memcpy_common
118
119contains
120
121 subroutine device_init
122#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
123 call device_addrtbl%init(64)
124
125#ifdef HAVE_HIP
127#elif HAVE_CUDA
129#elif HAVE_OPENCL
131#endif
133#endif
134 end subroutine device_init
135
137#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
138 call device_addrtbl%free()
139
140#ifdef HAVE_HIP
142#elif HAVE_CUDA
144#elif HAVE_OPENCL
147#endif
149#endif
150 end subroutine device_finalize
151
152 subroutine device_name(name)
153 character(len=*), intent(inout) :: name
154
155#ifdef HAVE_HIP
156 call hip_device_name(name)
157#elif HAVE_CUDA
158 call cuda_device_name(name)
159#elif HAVE_OPENCL
160 call opencl_device_name(name)
161#endif
162 end subroutine device_name
163
165 integer function device_count()
166#ifdef HAVE_HIP
168#elif HAVE_CUDA
170#elif HAVE_OPENCL
172#else
173 device_count = 0
174#endif
175 end function device_count
176
178 subroutine device_alloc(x_d, s)
179 type(c_ptr), intent(inout) :: x_d
180 integer(c_size_t) :: s
181 integer :: ierr
182#ifdef HAVE_HIP
183 if (hipmalloc(x_d, s) .ne. hipsuccess) then
184 call neko_error('Memory allocation on device failed')
185 end if
186#elif HAVE_CUDA
187 if (cudamalloc(x_d, s) .ne. cudasuccess) then
188 call neko_error('Memory allocation on device failed')
189 end if
190#elif HAVE_OPENCL
191 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
192 if (ierr .ne. cl_success) then
193 call neko_error('Memory allocation on device failed')
194 end if
195#endif
196 end subroutine device_alloc
197
199 subroutine device_free(x_d)
200 type(c_ptr), intent(inout) :: x_d
201#ifdef HAVE_HIP
202 if (hipfree(x_d) .ne. hipsuccess) then
203 call neko_error('Memory deallocation on device failed')
204 end if
205#elif HAVE_CUDA
206 if (cudafree(x_d) .ne. cudasuccess) then
207 call neko_error('Memory deallocation on device failed')
208 end if
209#elif HAVE_OPENCL
210 if (clreleasememobject(x_d) .ne. cl_success) then
211 call neko_error('Memory deallocation on device failed')
212 end if
213#endif
214 x_d = c_null_ptr
215 end subroutine device_free
216
218 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
219 integer, intent(in) :: n
220 class(*), intent(inout), target :: x(:)
221 type(c_ptr), intent(inout) :: x_d
222 integer, intent(in), value :: dir
223 logical :: sync
224 type(c_ptr), optional :: strm
225 type(c_ptr) :: ptr_h, copy_stream
226 integer(c_size_t) :: s
227
228 if (present(strm)) then
229 copy_stream = strm
230 else
231 copy_stream = glb_cmd_queue
232 end if
233
234 select type (x)
235 type is (integer)
236 s = n * 4
237 ptr_h = c_loc(x)
238 type is (integer(i8))
239 s = n * 8
240 ptr_h = c_loc(x)
241 type is (real)
242 s = n * 4
243 ptr_h = c_loc(x)
244 type is (double precision)
245 s = n * 8
246 ptr_h = c_loc(x)
247 class default
248 call neko_error('Unknown Fortran type')
249 end select
250
251 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
252
253 end subroutine device_memcpy_r1
254
256 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
257 integer, intent(in) :: n
258 class(*), intent(inout), target :: x(:,:)
259 type(c_ptr), intent(inout) :: x_d
260 integer, intent(in), value :: dir
261 logical :: sync
262 type(c_ptr), optional :: strm
263 type(c_ptr) :: ptr_h, copy_stream
264 integer(c_size_t) :: s
265
266 if (present(strm)) then
267 copy_stream = strm
268 else
269 copy_stream = glb_cmd_queue
270 end if
271
272 select type (x)
273 type is (integer)
274 s = n * 4
275 ptr_h = c_loc(x)
276 type is (integer(i8))
277 s = n * 8
278 ptr_h = c_loc(x)
279 type is (real)
280 s = n * 4
281 ptr_h = c_loc(x)
282 type is (double precision)
283 s = n * 8
284 ptr_h = c_loc(x)
285 class default
286 call neko_error('Unknown Fortran type')
287 end select
288
289 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
290
291 end subroutine device_memcpy_r2
292
294 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
295 integer, intent(in) :: n
296 class(*), intent(inout), target :: x(:,:,:)
297 type(c_ptr), intent(inout) :: x_d
298 integer, intent(in), value :: dir
299 logical :: sync
300 type(c_ptr), optional :: strm
301 type(c_ptr) :: ptr_h, copy_stream
302 integer(c_size_t) :: s
303
304 if (present(strm)) then
305 copy_stream = strm
306 else
307 copy_stream = glb_cmd_queue
308 end if
309
310 select type (x)
311 type is (integer)
312 s = n * 4
313 ptr_h = c_loc(x)
314 type is (integer(i8))
315 s = n * 8
316 ptr_h = c_loc(x)
317 type is (real)
318 s = n * 4
319 ptr_h = c_loc(x)
320 type is (double precision)
321 s = n * 8
322 ptr_h = c_loc(x)
323 class default
324 call neko_error('Unknown Fortran type')
325 end select
326
327 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
328
329 end subroutine device_memcpy_r3
330
332 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
333 integer, intent(in) :: n
334 class(*), intent(inout), target :: x(:,:,:,:)
335 type(c_ptr), intent(inout) :: x_d
336 integer, intent(in), value :: dir
337 logical :: sync
338 type(c_ptr), optional :: strm
339 type(c_ptr) :: ptr_h, copy_stream
340 integer(c_size_t) :: s
341
342 if (present(strm)) then
343 copy_stream = strm
344 else
345 copy_stream = glb_cmd_queue
346 end if
347
348 select type (x)
349 type is (integer)
350 s = n * 4
351 ptr_h = c_loc(x)
352 type is (integer(i8))
353 s = n * 8
354 ptr_h = c_loc(x)
355 type is (real)
356 s = n * 4
357 ptr_h = c_loc(x)
358 type is (double precision)
359 s = n * 8
360 ptr_h = c_loc(x)
361 class default
362 call neko_error('Unknown Fortran type')
363 end select
364
365 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
366
367 end subroutine device_memcpy_r4
368
372 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
373 type(c_ptr), intent(inout) :: dst
374 type(c_ptr), intent(inout) :: src
375 integer(c_size_t), intent(in) :: s
376 integer, intent(in), value :: dir
377 logical, optional :: sync
378 type(c_ptr), optional :: strm
379 type(c_ptr) :: copy_stream
380 logical :: sync_device
381
382 if (present(sync)) then
383 sync_device = sync
384 else
385 sync_device = .false.
386 end if
387
388 if (present(strm)) then
389 copy_stream = strm
390 else
391 copy_stream = glb_cmd_queue
392 end if
393
394 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
395
396 end subroutine device_memcpy_cptr
397
401 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
402 type(c_ptr), intent(inout) :: ptr_h
403 type(c_ptr), intent(inout) :: x_d
404 integer(c_size_t), intent(in) :: s
405 integer, intent(in), value :: dir
406 logical, intent(in) :: sync_device
407 type(c_ptr), intent(inout) :: stream
408#ifdef HAVE_HIP
409 if (dir .eq. host_to_device) then
410 if (hipmemcpyasync(x_d, ptr_h, s, &
411 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
412 call neko_error('Device memcpy async (host-to-device) failed')
413 end if
414 else if (dir .eq. device_to_host) then
415 if (hipmemcpyasync(ptr_h, x_d, s, &
416 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
417 call neko_error('Device memcpy async (device-to-host) failed')
418 end if
419 else if (dir .eq. device_to_device) then
420 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
421 .ne. hipsuccess) then
422 call neko_error('Device memcpy async (device-to-device) failed')
423 end if
424 else
425 call neko_error('Device memcpy failed (invalid direction')
426 end if
427 if (sync_device) then
428 call device_sync_stream(stream)
429 end if
430#elif HAVE_CUDA
431 if (dir .eq. host_to_device) then
432 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
433 .ne. cudasuccess) then
434 call neko_error('Device memcpy async (host-to-device) failed')
435 end if
436 else if (dir .eq. device_to_host) then
437 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
438 .ne. cudasuccess) then
439 call neko_error('Device memcpy async (device-to-host) failed')
440 end if
441 else if (dir .eq. device_to_device) then
442 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
443 .ne. cudasuccess) then
444 call neko_error('Device memcpy async (device-to-device) failed')
445 end if
446 else
447 call neko_error('Device memcpy failed (invalid direction')
448 end if
449 if (sync_device) then
450 call device_sync_stream(stream)
451 end if
452#elif HAVE_OPENCL
453 if (sync_device) then
454 if (dir .eq. host_to_device) then
455 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
456 ptr_h, 0, c_null_ptr, c_null_ptr) &
457 .ne. cl_success) then
458 call neko_error('Device memcpy (host-to-device) failed')
459 end if
460 else if (dir .eq. device_to_host) then
461 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
462 0, c_null_ptr, c_null_ptr) &
463 .ne. cl_success) then
464 call neko_error('Device memcpy (device-to-host) failed')
465 end if
466 else if (dir .eq. device_to_device) then
467 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
468 0, c_null_ptr, c_null_ptr) &
469 .ne. cl_success) then
470 call neko_error('Device memcpy (device-to-device) failed')
471 end if
472 else
473 call neko_error('Device memcpy failed (invalid direction')
474 end if
475 else
476 if (dir .eq. host_to_device) then
477 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
478 ptr_h, 0, c_null_ptr, c_null_ptr) &
479 .ne. cl_success) then
480 call neko_error('Device memcpy (host-to-device) failed')
481 end if
482 else if (dir .eq. device_to_host) then
483 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
484 0, c_null_ptr, c_null_ptr) &
485 .ne. cl_success) then
486 call neko_error('Device memcpy (device-to-host) failed')
487 end if
488 else if (dir .eq. device_to_device) then
489 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
490 0, c_null_ptr, c_null_ptr) &
491 .ne. cl_success) then
492 call neko_error('Device memcpy (device-to-device) failed')
493 end if
494 else
495 call neko_error('Device memcpy failed (invalid direction')
496 end if
497 end if
498#endif
499 end subroutine device_memcpy_common
500
502 subroutine device_associate_r1(x, x_d)
503 class(*), intent(inout), target :: x(:)
504 type(c_ptr), intent(inout) :: x_d
505 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
506
507 select type (x)
508 type is (integer)
509 htbl_ptr_h%ptr = c_loc(x)
510 type is (integer(i8))
511 htbl_ptr_h%ptr = c_loc(x)
512 type is (real)
513 htbl_ptr_h%ptr = c_loc(x)
514 type is (double precision)
515 htbl_ptr_h%ptr = c_loc(x)
516 class default
517 call neko_error('Unknown Fortran type')
518 end select
519
520 htbl_ptr_d%ptr = x_d
521
522 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
523
524 end subroutine device_associate_r1
525
527 subroutine device_associate_r2(x, x_d)
528 class(*), intent(inout), target :: x(:,:)
529 type(c_ptr), intent(inout) :: x_d
530 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
531
532 select type (x)
533 type is (integer)
534 htbl_ptr_h%ptr = c_loc(x)
535 type is (integer(i8))
536 htbl_ptr_h%ptr = c_loc(x)
537 type is (real)
538 htbl_ptr_h%ptr = c_loc(x)
539 type is (double precision)
540 htbl_ptr_h%ptr = c_loc(x)
541 class default
542 call neko_error('Unknown Fortran type')
543 end select
544
545 htbl_ptr_d%ptr = x_d
546
547 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
548
549 end subroutine device_associate_r2
550
552 subroutine device_associate_r3(x, x_d)
553 class(*), intent(inout), target :: x(:,:,:)
554 type(c_ptr), intent(inout) :: x_d
555 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
556
557 select type (x)
558 type is (integer)
559 htbl_ptr_h%ptr = c_loc(x)
560 type is (integer(i8))
561 htbl_ptr_h%ptr = c_loc(x)
562 type is (real)
563 htbl_ptr_h%ptr = c_loc(x)
564 type is (double precision)
565 htbl_ptr_h%ptr = c_loc(x)
566 class default
567 call neko_error('Unknown Fortran type')
568 end select
569
570 htbl_ptr_d%ptr = x_d
571
572 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
573
574 end subroutine device_associate_r3
575
577 subroutine device_associate_r4(x, x_d)
578 class(*), intent(inout), target :: x(:,:,:,:)
579 type(c_ptr), intent(inout) :: x_d
580 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
581
582 select type (x)
583 type is (integer)
584 htbl_ptr_h%ptr = c_loc(x)
585 type is (integer(i8))
586 htbl_ptr_h%ptr = c_loc(x)
587 type is (real)
588 htbl_ptr_h%ptr = c_loc(x)
589 type is (double precision)
590 htbl_ptr_h%ptr = c_loc(x)
591 class default
592 call neko_error('Unknown Fortran type')
593 end select
594
595 htbl_ptr_d%ptr = x_d
596
597 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
598
599 end subroutine device_associate_r4
600
603 class(*), intent(inout), target :: x(:)
604 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
605
606 select type (x)
607 type is (integer)
608 htbl_ptr_h%ptr = c_loc(x)
609 type is (integer(i8))
610 htbl_ptr_h%ptr = c_loc(x)
611 type is (real)
612 htbl_ptr_h%ptr = c_loc(x)
613 type is (double precision)
614 htbl_ptr_h%ptr = c_loc(x)
615 class default
616 call neko_error('Unknown Fortran type')
617 end select
618
619 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
620 call device_addrtbl%remove(htbl_ptr_h)
621 end if
622
623 end subroutine device_deassociate_r1
624
627 class(*), intent(inout), target :: x(:,:)
628 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
629
630 select type (x)
631 type is (integer)
632 htbl_ptr_h%ptr = c_loc(x)
633 type is (integer(i8))
634 htbl_ptr_h%ptr = c_loc(x)
635 type is (real)
636 htbl_ptr_h%ptr = c_loc(x)
637 type is (double precision)
638 htbl_ptr_h%ptr = c_loc(x)
639 class default
640 call neko_error('Unknown Fortran type')
641 end select
642
643 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
644 call device_addrtbl%remove(htbl_ptr_h)
645 end if
646
647 end subroutine device_deassociate_r2
648
651 class(*), intent(inout), target :: x(:,:,:)
652 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
653
654 select type (x)
655 type is (integer)
656 htbl_ptr_h%ptr = c_loc(x)
657 type is (integer(i8))
658 htbl_ptr_h%ptr = c_loc(x)
659 type is (real)
660 htbl_ptr_h%ptr = c_loc(x)
661 type is (double precision)
662 htbl_ptr_h%ptr = c_loc(x)
663 class default
664 call neko_error('Unknown Fortran type')
665 end select
666
667 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
668 call device_addrtbl%remove(htbl_ptr_h)
669 end if
670
671 end subroutine device_deassociate_r3
672
675 class(*), intent(inout), target :: x(:,:,:,:)
676 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
677
678 select type (x)
679 type is (integer)
680 htbl_ptr_h%ptr = c_loc(x)
681 type is (integer(i8))
682 htbl_ptr_h%ptr = c_loc(x)
683 type is (real)
684 htbl_ptr_h%ptr = c_loc(x)
685 type is (double precision)
686 htbl_ptr_h%ptr = c_loc(x)
687 class default
688 call neko_error('Unknown Fortran type')
689 end select
690
691 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
692 call device_addrtbl%remove(htbl_ptr_h)
693 end if
694
695 end subroutine device_deassociate_r4
696
698 subroutine device_map_r1(x, x_d, n)
699 integer, intent(in) :: n
700 class(*), intent(inout), target :: x(:)
701 type(c_ptr), intent(inout) :: x_d
702 integer(c_size_t) :: s
703
704 if (c_associated(x_d)) then
705 call neko_error('Device pointer already associated')
706 end if
707
708 select type (x)
709 type is (integer)
710 s = n * 4
711 type is (integer(i8))
712 s = n * 8
713 type is (real)
714 s = n * 4
715 type is (double precision)
716 s = n * 8
717 class default
718 call neko_error('Unknown Fortran type')
719 end select
720
721 call device_alloc(x_d, s)
722 call device_associate(x, x_d)
723
724 end subroutine device_map_r1
725
727 subroutine device_map_r2(x, x_d, n)
728 integer, intent(in) :: n
729 class(*), intent(inout), target :: x(:,:)
730 type(c_ptr), intent(inout) :: x_d
731 integer(c_size_t) :: s
732
733 if (c_associated(x_d)) then
734 call neko_error('Device pointer already associated')
735 end if
736
737 select type (x)
738 type is (integer)
739 s = n * 4
740 type is (integer(i8))
741 s = n * 8
742 type is (real)
743 s = n * 4
744 type is (double precision)
745 s = n * 8
746 class default
747 call neko_error('Unknown Fortran type')
748 end select
749
750 call device_alloc(x_d, s)
751 call device_associate(x, x_d)
752
753 end subroutine device_map_r2
754
756 subroutine device_map_r3(x, x_d, n)
757 integer, intent(in) :: n
758 class(*), intent(inout), target :: x(:,:,:)
759 type(c_ptr), intent(inout) :: x_d
760 integer(c_size_t) :: s
761
762 if (c_associated(x_d)) then
763 call neko_error('Device pointer already associated')
764 end if
765
766 select type (x)
767 type is (integer)
768 s = n * 4
769 type is (integer(i8))
770 s = n * 8
771 type is (real)
772 s = n * 4
773 type is (double precision)
774 s = n * 8
775 class default
776 call neko_error('Unknown Fortran type')
777 end select
778
779 call device_alloc(x_d, s)
780 call device_associate(x, x_d)
781
782 end subroutine device_map_r3
783
785 subroutine device_map_r4(x, x_d, n)
786 integer, intent(in) :: n
787 class(*), intent(inout), target :: x(:,:,:,:)
788 type(c_ptr), intent(inout) :: x_d
789 integer(c_size_t) :: s
790
791 if (c_associated(x_d)) then
792 call neko_error('Device pointer already associated')
793 end if
794
795 select type (x)
796 type is (integer)
797 s = n * 4
798 type is (integer(i8))
799 s = n * 8
800 type is (real)
801 s = n * 4
802 type is (double precision)
803 s = n * 8
804 class default
805 call neko_error('Unknown Fortran type')
806 end select
807
808 call device_alloc(x_d, s)
809 call device_associate(x, x_d)
810
811 end subroutine device_map_r4
812
814 function device_associated_r1(x) result(assoc)
815 class(*), intent(inout), target :: x(:)
816 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
817 logical :: assoc
818
819 select type (x)
820 type is (integer)
821 htbl_ptr_h%ptr = c_loc(x)
822 type is (integer(i8))
823 htbl_ptr_h%ptr = c_loc(x)
824 type is (real)
825 htbl_ptr_h%ptr = c_loc(x)
826 type is (double precision)
827 htbl_ptr_h%ptr = c_loc(x)
828 class default
829 call neko_error('Unknown Fortran type')
830 end select
831
832 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
833 assoc = .true.
834 else
835 assoc = .false.
836 end if
837
838 end function device_associated_r1
839
841 function device_associated_r2(x) result(assoc)
842 class(*), intent(inout), target :: x(:,:)
843 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
844 logical :: assoc
845
846 select type (x)
847 type is (integer)
848 htbl_ptr_h%ptr = c_loc(x)
849 type is (integer(i8))
850 htbl_ptr_h%ptr = c_loc(x)
851 type is (real)
852 htbl_ptr_h%ptr = c_loc(x)
853 type is (double precision)
854 htbl_ptr_h%ptr = c_loc(x)
855 class default
856 call neko_error('Unknown Fortran type')
857 end select
858
859 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
860 assoc = .true.
861 else
862 assoc = .false.
863 end if
864
865 end function device_associated_r2
866
868 function device_associated_r3(x) result(assoc)
869 class(*), intent(inout), target :: x(:,:,:)
870 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
871 logical :: assoc
872
873 select type (x)
874 type is (integer)
875 htbl_ptr_h%ptr = c_loc(x)
876 type is (integer(i8))
877 htbl_ptr_h%ptr = c_loc(x)
878 type is (real)
879 htbl_ptr_h%ptr = c_loc(x)
880 type is (double precision)
881 htbl_ptr_h%ptr = c_loc(x)
882 class default
883 call neko_error('Unknown Fortran type')
884 end select
885
886 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
887 assoc = .true.
888 else
889 assoc = .false.
890 end if
891
892 end function device_associated_r3
893
895 function device_associated_r4(x) result(assoc)
896 class(*), intent(inout), target :: x(:,:,:,:)
897 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
898 logical :: assoc
899
900 select type (x)
901 type is (integer)
902 htbl_ptr_h%ptr = c_loc(x)
903 type is (integer(i8))
904 htbl_ptr_h%ptr = c_loc(x)
905 type is (real)
906 htbl_ptr_h%ptr = c_loc(x)
907 type is (double precision)
908 htbl_ptr_h%ptr = c_loc(x)
909 class default
910 call neko_error('Unknown Fortran type')
911 end select
912
913 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
914 assoc = .true.
915 else
916 assoc = .false.
917 end if
918
919 end function device_associated_r4
920
923 class(*), intent(in), target :: x(:)
924 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
925 type(c_ptr) :: device_get_ptr_r1
926
927 device_get_ptr_r1 = c_null_ptr
928
929 select type (x)
930 type is (integer)
931 htbl_ptr_h%ptr = c_loc(x)
932 type is (integer(i8))
933 htbl_ptr_h%ptr = c_loc(x)
934 type is (real)
935 htbl_ptr_h%ptr = c_loc(x)
936 type is (double precision)
937 htbl_ptr_h%ptr = c_loc(x)
938 class default
939 call neko_error('Unknown Fortran type')
940 end select
941
942 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
943 device_get_ptr_r1 = htbl_ptr_d%ptr
944 else
945 call neko_error('Array not associated with device')
946 end if
947 end function device_get_ptr_r1
948
951 class(*), intent(in), target :: x(:,:)
952 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
953 type(c_ptr) :: device_get_ptr_r2
954
955 device_get_ptr_r2 = c_null_ptr
956
957 select type (x)
958 type is (integer)
959 htbl_ptr_h%ptr = c_loc(x)
960 type is (integer(i8))
961 htbl_ptr_h%ptr = c_loc(x)
962 type is (real)
963 htbl_ptr_h%ptr = c_loc(x)
964 type is (double precision)
965 htbl_ptr_h%ptr = c_loc(x)
966 class default
967 call neko_error('Unknown Fortran type')
968 end select
969
970 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
971 device_get_ptr_r2 = htbl_ptr_d%ptr
972 else
973 call neko_error('Array not associated with device')
974 end if
975 end function device_get_ptr_r2
976
979 class(*), intent(in), target :: x(:,:,:)
980 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
981 type(c_ptr) :: device_get_ptr_r3
982
983 device_get_ptr_r3 = c_null_ptr
984
985 select type (x)
986 type is (integer)
987 htbl_ptr_h%ptr = c_loc(x)
988 type is (integer(i8))
989 htbl_ptr_h%ptr = c_loc(x)
990 type is (real)
991 htbl_ptr_h%ptr = c_loc(x)
992 type is (double precision)
993 htbl_ptr_h%ptr = c_loc(x)
994 class default
995 call neko_error('Unknown Fortran type')
996 end select
997
998 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
999 device_get_ptr_r3 = htbl_ptr_d%ptr
1000 else
1001 call neko_error('Array not associated with device')
1002 end if
1003 end function device_get_ptr_r3
1004
1007 class(*), intent(in), target :: x(:,:,:,:)
1008 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
1009 type(c_ptr) :: device_get_ptr_r4
1010
1011 device_get_ptr_r4 = c_null_ptr
1012
1013 select type (x)
1014 type is (integer)
1015 htbl_ptr_h%ptr = c_loc(x)
1016 type is (integer(i8))
1017 htbl_ptr_h%ptr = c_loc(x)
1018 type is (real)
1019 htbl_ptr_h%ptr = c_loc(x)
1020 type is (double precision)
1021 htbl_ptr_h%ptr = c_loc(x)
1022 class default
1023 call neko_error('Unknown Fortran type')
1024 end select
1025
1026 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1027 device_get_ptr_r4 = htbl_ptr_d%ptr
1028 else
1029 call neko_error('Array not associated with device')
1030 end if
1031 end function device_get_ptr_r4
1032
1035#ifdef HAVE_HIP
1036 if (hipdevicesynchronize() .ne. hipsuccess) then
1037 call neko_error('Error during device sync')
1038 end if
1039#elif HAVE_CUDA
1040 if (cudadevicesynchronize() .ne. cudasuccess) then
1041 call neko_error('Error during device sync')
1042 end if
1043#elif HAVE_OPENCL
1044 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1045 call neko_error('Error during device sync')
1046 end if
1047#endif
1048 end subroutine device_sync_device
1049
1051 subroutine device_sync_stream(stream)
1052 type(c_ptr), intent(in) :: stream
1053#ifdef HAVE_HIP
1054 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1055 call neko_error('Error during stream sync')
1056 end if
1057#elif HAVE_CUDA
1058 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1059 call neko_error('Error during stream sync')
1060 end if
1061#elif HAVE_OPENCL
1062 if (clfinish(stream) .ne. cl_success) then
1063 call neko_error('Error during stream sync')
1064 end if
1065#endif
1066 end subroutine device_sync_stream
1067
1069 subroutine device_stream_create(stream, flags)
1070 type(c_ptr), intent(inout) :: stream
1071 integer, optional :: flags
1072 integer :: ierr
1073#ifdef HAVE_HIP
1074 if (present(flags)) then
1075 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1076 call neko_error('Error during stream create (w. flags)')
1077 end if
1078 else
1079 if (hipstreamcreate(stream) .ne. hipsuccess) then
1080 call neko_error('Error during stream create')
1081 end if
1082 end if
1083#elif HAVE_CUDA
1084 if (present(flags)) then
1085 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1086 call neko_error('Error during stream create (w. flags)')
1087 end if
1088 else
1089 if (cudastreamcreate(stream) .ne. cudasuccess) then
1090 call neko_error('Error during stream create')
1091 end if
1092 end if
1093#elif HAVE_OPENCL
1094 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1095 if (ierr .ne. cl_success) then
1096 call neko_error('Error during stream create')
1097 end if
1098#endif
1099 end subroutine device_stream_create
1100
1102 subroutine device_stream_create_with_priority(stream, flags, prio)
1103 type(c_ptr), intent(inout) :: stream
1104 integer, intent(in) :: flags, prio
1105#ifdef HAVE_HIP
1106 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1107 call neko_error('Error during stream create (w. priority)')
1108 end if
1109#elif HAVE_CUDA
1110 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1111 call neko_error('Error during stream create (w. priority)')
1112 end if
1113#elif HAVE_OPENCL
1114 call neko_error('Not implemented yet')
1115#endif
1117
1119 subroutine device_stream_destroy(stream)
1120 type(c_ptr), intent(inout) :: stream
1121#ifdef HAVE_HIP
1122 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1123 call neko_error('Error during stream destroy')
1124 end if
1125#elif HAVE_CUDA
1126 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1127 call neko_error('Error during stream destroy')
1128 end if
1129#elif HAVE_OPENCL
1130 if (clreleasecommandqueue(stream) .ne. cl_success) then
1131 call neko_error('Error during stream destroy')
1132 end if
1133#endif
1134 end subroutine device_stream_destroy
1135
1137 subroutine device_stream_wait_event(stream, event, flags)
1138 type(c_ptr), intent(in) :: stream
1139 type(c_ptr), target, intent(in) :: event
1140 integer :: flags
1141#ifdef HAVE_HIP
1142 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1143 call neko_error('Error during stream sync')
1144 end if
1145#elif HAVE_CUDA
1146 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1147 call neko_error('Error during stream sync')
1148 end if
1149#elif HAVE_OPENCL
1150 if (clenqueuebarrier(stream) .ne. cl_success) then
1151 call neko_error('Error during barrier')
1152 end if
1153 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1154 call neko_error('Error during stream sync')
1155 end if
1156#endif
1157 end subroutine device_stream_wait_event
1158
1161#if HAVE_CUDA
1162 if (cudaprofilerstart() .ne. cudasuccess) then
1163 call neko_error('Error starting profiler')
1164 end if
1165#endif
1166 end subroutine device_profiler_start
1167
1170#if HAVE_CUDA
1171 if (cudaprofilerstop() .ne. cudasuccess) then
1172 call neko_error('Error stopping profiler')
1173 end if
1174#endif
1175 end subroutine device_profiler_stop
1176
1178 subroutine device_event_create(event, flags)
1179 type(c_ptr), intent(inout) :: event
1180 integer, optional :: flags
1181 integer :: ierr
1182#ifdef HAVE_HIP
1183 if (present(flags)) then
1184 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1185 call neko_error('Error during event create (w. flags)')
1186 end if
1187 else
1188 if (hipeventcreate(event) .ne. hipsuccess) then
1189 call neko_error('Error during event create')
1190 end if
1191 end if
1192#elif HAVE_CUDA
1193 if (present(flags)) then
1194 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1195 call neko_error('Error during event create (w. flags)')
1196 end if
1197 else
1198 if (cudaeventcreate(event) .ne. cudasuccess) then
1199 call neko_error('Error during event create')
1200 end if
1201 end if
1202#elif HAVE_OPENCL
1203 event = c_null_ptr
1204#endif
1205 end subroutine device_event_create
1206
1208 subroutine device_event_destroy(event)
1209 type(c_ptr), intent(inout) :: event
1210#ifdef HAVE_HIP
1211 if (hipeventdestroy(event) .ne. hipsuccess) then
1212 call neko_error('Error during event destroy')
1213 end if
1214#elif HAVE_CUDA
1215 if (cudaeventdestroy(event) .ne. cudasuccess) then
1216 call neko_error('Error during event destroy')
1217 end if
1218#elif HAVE_OPENCL
1219 event = c_null_ptr
1220#endif
1221 end subroutine device_event_destroy
1222
1224 subroutine device_event_record(event, stream)
1225 type(c_ptr), target, intent(in) :: event
1226 type(c_ptr), intent(in) :: stream
1227#ifdef HAVE_HIP
1228 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1229 call neko_error('Error recording an event')
1230 end if
1231#elif HAVE_CUDA
1232 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1233 call neko_error('Error recording an event')
1234 end if
1235#elif HAVE_OPENCL
1236 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1237 call neko_error('Error recording an event')
1238 end if
1239#endif
1240 end subroutine device_event_record
1241
1243 subroutine device_event_sync(event)
1244 type(c_ptr), target, intent(in) :: event
1245#ifdef HAVE_HIP
1246 if (hipeventsynchronize(event) .ne. hipsuccess) then
1247 call neko_error('Error during event sync')
1248 end if
1249#elif HAVE_CUDA
1250 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1251 call neko_error('Error during event sync')
1252 end if
1253#elif HAVE_OPENCL
1254 if (c_associated(event)) then
1255 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1256 call neko_error('Error during event sync')
1257 end if
1258 end if
1259#endif
1260 end subroutine device_event_sync
1261
1262end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:77
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:83
Deassociate a Fortran array from a device pointer.
Definition device.F90:89
Return the device pointer for an associated Fortran array.
Definition device.F90:95
Map a Fortran array to a device (allocate and associate)
Definition device.F90:71
Copy data between host and device (or device and device)
Definition device.F90:65
Synchronize a device or stream.
Definition device.F90:101
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize(glb_cmd_queue, aux_cmd_queue)
@ cudamemcpydevicetohost
Definition cuda_intf.F90:54
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:55
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:53
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
subroutine cuda_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1225
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1244
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:528
subroutine, public device_finalize
Definition device.F90:137
integer, parameter, public device_to_device
Definition device.F90:46
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:1007
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:923
integer, public strm_low_prio
Low priority stream setting.
Definition device.F90:62
integer, parameter, public host_to_device
Definition device.F90:46
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:757
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:402
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:869
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:106
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1170
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:651
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1052
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:979
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1161
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:728
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:257
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:786
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:200
integer, parameter, public device_to_host
Definition device.F90:46
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:373
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1209
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:179
subroutine, public device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1103
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1070
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:675
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1035
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:503
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:333
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1138
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:699
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:219
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:50
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1179
integer function, public device_count()
Return the number of available devices.
Definition device.F90:166
subroutine, public device_name(name)
Definition device.F90:153
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:896
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:842
integer, public strm_high_prio
High priority stream setting.
Definition device.F90:59
type(c_ptr), bind(C), public aux_cmd_queue
Aux command queue.
Definition device.F90:53
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:951
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:578
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:603
type(c_ptr), bind(C), public glb_cmd_event
Event for the global command queue.
Definition device.F90:56
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:627
subroutine, public device_init
Definition device.F90:122
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:553
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:815
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:295
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1120
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:262
@ hipmemcpydevicetohost
Definition hip_intf.F90:72
@ hipmemcpydevicetodevice
Definition hip_intf.F90:73
@ hipmemcpyhosttodevice
Definition hip_intf.F90:71
subroutine hip_init(glb_cmd_queue, aux_cmd_queue, strm_high_prio, strm_low_prio)
Definition hip_intf.F90:227
subroutine hip_finalize(glb_cmd_queue, aux_cmd_queue)
Definition hip_intf.F90:249
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:279
Implements a hash table ADT.
Definition htable.f90:36
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_finalize(glb_cmd_queue, aux_cmd_queue)
subroutine opencl_init(glb_cmd_queue, aux_cmd_queue)
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine, public opencl_prgm_lib_release
Definition prgm_lib.F90:97
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:142