Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module device
35 use num_types, only : i8
36 use opencl_intf
37 use cuda_intf
38 use hip_intf
39 use htable, only : htable_cptr_t, h_cptr_t
40 use utils, only : neko_error
41 use dummy_device
43 use, intrinsic :: iso_c_binding
44 implicit none
45! private
46
47 integer, public, parameter :: host_to_device = 1, device_to_host = 2, &
49
54 end interface device_memcpy
55
57 interface device_map
58 module procedure device_map_r1, device_map_r2, &
60 end interface device_map
61
66 end interface device_associate
67
72 end interface device_associated
73
78 end interface device_deassociate
79
84 end interface device_get_ptr
85
87 interface device_sync
89 end interface device_sync
90
92 type(htable_cptr_t), private :: device_addrtbl
93
101
102 private :: device_memcpy_common
103
104contains
105
106 subroutine device_init
107#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
108 call device_addrtbl%init(64)
109
110#ifdef HAVE_HIP
111 call hip_init
112#elif HAVE_CUDA
113 call cuda_init
114#elif HAVE_OPENCL
115 call opencl_init
116#endif
117
118#endif
119 end subroutine device_init
120
122#if defined(HAVE_HIP) || defined(HAVE_CUDA) || defined(HAVE_OPENCL)
123 call device_addrtbl%free()
124
125#ifdef HAVE_HIP
126 call hip_finalize
127#elif HAVE_CUDA
128 call cuda_finalize
129#elif HAVE_OPENCL
131 call opencl_finalize
132#endif
133
134#endif
135 end subroutine device_finalize
136
137 subroutine device_name(name)
138 character(len=*), intent(inout) :: name
139
140#ifdef HAVE_HIP
141 call hip_device_name(name)
142#elif HAVE_CUDA
143 call cuda_device_name(name)
144#elif HAVE_OPENCL
145 call opencl_device_name(name)
146#endif
147 end subroutine device_name
148
150 integer function device_count()
151#ifdef HAVE_HIP
153#elif HAVE_CUDA
155#elif HAVE_OPENCL
157#else
158 device_count = 0
159#endif
160 end function device_count
161
163 subroutine device_alloc(x_d, s)
164 type(c_ptr), intent(inout) :: x_d
165 integer(c_size_t) :: s
166 integer :: ierr
167#ifdef HAVE_HIP
168 if (hipmalloc(x_d, s) .ne. hipsuccess) then
169 call neko_error('Memory allocation on device failed')
170 end if
171#elif HAVE_CUDA
172 if (cudamalloc(x_d, s) .ne. cudasuccess) then
173 call neko_error('Memory allocation on device failed')
174 end if
175#elif HAVE_OPENCL
176 x_d = clcreatebuffer(glb_ctx, cl_mem_read_write, s, c_null_ptr, ierr)
177 if (ierr .ne. cl_success) then
178 call neko_error('Memory allocation on device failed')
179 end if
180#endif
181 end subroutine device_alloc
182
184 subroutine device_free(x_d)
185 type(c_ptr), intent(inout) :: x_d
186#ifdef HAVE_HIP
187 if (hipfree(x_d) .ne. hipsuccess) then
188 call neko_error('Memory deallocation on device failed')
189 end if
190#elif HAVE_CUDA
191 if (cudafree(x_d) .ne. cudasuccess) then
192 call neko_error('Memory deallocation on device failed')
193 end if
194#elif HAVE_OPENCL
195 if (clreleasememobject(x_d) .ne. cl_success) then
196 call neko_error('Memory deallocation on device failed')
197 end if
198#endif
199 x_d = c_null_ptr
200 end subroutine device_free
201
203 subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
204 integer, intent(in) :: n
205 class(*), intent(inout), target :: x(:)
206 type(c_ptr), intent(inout) :: x_d
207 integer, intent(in), value :: dir
208 logical :: sync
209 type(c_ptr), optional :: strm
210 type(c_ptr) :: ptr_h, copy_stream
211 integer(c_size_t) :: s
212
213 if (present(strm)) then
214 copy_stream = strm
215 else
216 copy_stream = glb_cmd_queue
217 end if
218
219 select type (x)
220 type is (integer)
221 s = n * 4
222 ptr_h = c_loc(x)
223 type is (integer(i8))
224 s = n * 8
225 ptr_h = c_loc(x)
226 type is (real)
227 s = n * 4
228 ptr_h = c_loc(x)
229 type is (double precision)
230 s = n * 8
231 ptr_h = c_loc(x)
232 class default
233 call neko_error('Unknown Fortran type')
234 end select
235
236 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
237
238 end subroutine device_memcpy_r1
239
241 subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
242 integer, intent(in) :: n
243 class(*), intent(inout), target :: x(:,:)
244 type(c_ptr), intent(inout) :: x_d
245 integer, intent(in), value :: dir
246 logical :: sync
247 type(c_ptr), optional :: strm
248 type(c_ptr) :: ptr_h, copy_stream
249 integer(c_size_t) :: s
250
251 if (present(strm)) then
252 copy_stream = strm
253 else
254 copy_stream = glb_cmd_queue
255 end if
256
257 select type (x)
258 type is (integer)
259 s = n * 4
260 ptr_h = c_loc(x)
261 type is (integer(i8))
262 s = n * 8
263 ptr_h = c_loc(x)
264 type is (real)
265 s = n * 4
266 ptr_h = c_loc(x)
267 type is (double precision)
268 s = n * 8
269 ptr_h = c_loc(x)
270 class default
271 call neko_error('Unknown Fortran type')
272 end select
273
274 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
275
276 end subroutine device_memcpy_r2
277
279 subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
280 integer, intent(in) :: n
281 class(*), intent(inout), target :: x(:,:,:)
282 type(c_ptr), intent(inout) :: x_d
283 integer, intent(in), value :: dir
284 logical :: sync
285 type(c_ptr), optional :: strm
286 type(c_ptr) :: ptr_h, copy_stream
287 integer(c_size_t) :: s
288
289 if (present(strm)) then
290 copy_stream = strm
291 else
292 copy_stream = glb_cmd_queue
293 end if
294
295 select type (x)
296 type is (integer)
297 s = n * 4
298 ptr_h = c_loc(x)
299 type is (integer(i8))
300 s = n * 8
301 ptr_h = c_loc(x)
302 type is (real)
303 s = n * 4
304 ptr_h = c_loc(x)
305 type is (double precision)
306 s = n * 8
307 ptr_h = c_loc(x)
308 class default
309 call neko_error('Unknown Fortran type')
310 end select
311
312 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
313
314 end subroutine device_memcpy_r3
315
317 subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
318 integer, intent(in) :: n
319 class(*), intent(inout), target :: x(:,:,:,:)
320 type(c_ptr), intent(inout) :: x_d
321 integer, intent(in), value :: dir
322 logical :: sync
323 type(c_ptr), optional :: strm
324 type(c_ptr) :: ptr_h, copy_stream
325 integer(c_size_t) :: s
326
327 if (present(strm)) then
328 copy_stream = strm
329 else
330 copy_stream = glb_cmd_queue
331 end if
332
333 select type (x)
334 type is (integer)
335 s = n * 4
336 ptr_h = c_loc(x)
337 type is (integer(i8))
338 s = n * 8
339 ptr_h = c_loc(x)
340 type is (real)
341 s = n * 4
342 ptr_h = c_loc(x)
343 type is (double precision)
344 s = n * 8
345 ptr_h = c_loc(x)
346 class default
347 call neko_error('Unknown Fortran type')
348 end select
349
350 call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
351
352 end subroutine device_memcpy_r4
353
357 subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
358 type(c_ptr), intent(inout) :: dst
359 type(c_ptr), intent(inout) :: src
360 integer(c_size_t), intent(in) :: s
361 integer, intent(in), value :: dir
362 logical, optional :: sync
363 type(c_ptr), optional :: strm
364 type(c_ptr) :: copy_stream
365 logical :: sync_device
366
367 if (present(sync)) then
368 sync_device = sync
369 else
370 sync_device = .false.
371 end if
372
373 if (present(strm)) then
374 copy_stream = strm
375 else
376 copy_stream = glb_cmd_queue
377 end if
378
379 call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
380
381 end subroutine device_memcpy_cptr
382
386 subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
387 type(c_ptr), intent(inout) :: ptr_h
388 type(c_ptr), intent(inout) :: x_d
389 integer(c_size_t), intent(in) :: s
390 integer, intent(in), value :: dir
391 logical, intent(in) :: sync_device
392 type(c_ptr), intent(inout) :: stream
393#ifdef HAVE_HIP
394 if (dir .eq. host_to_device) then
395 if (hipmemcpyasync(x_d, ptr_h, s, &
396 hipmemcpyhosttodevice, stream) .ne. hipsuccess) then
397 call neko_error('Device memcpy async (host-to-device) failed')
398 end if
399 else if (dir .eq. device_to_host) then
400 if (hipmemcpyasync(ptr_h, x_d, s, &
401 hipmemcpydevicetohost, stream) .ne. hipsuccess) then
402 call neko_error('Device memcpy async (device-to-host) failed')
403 end if
404 else if (dir .eq. device_to_device) then
405 if (hipmemcpyasync(ptr_h, x_d, s, hipmemcpydevicetodevice, stream) &
406 .ne. hipsuccess) then
407 call neko_error('Device memcpy async (device-to-device) failed')
408 end if
409 else
410 call neko_error('Device memcpy failed (invalid direction')
411 end if
412 if (sync_device) then
413 call device_sync_stream(stream)
414 end if
415#elif HAVE_CUDA
416 if (dir .eq. host_to_device) then
417 if (cudamemcpyasync(x_d, ptr_h, s, cudamemcpyhosttodevice, stream) &
418 .ne. cudasuccess) then
419 call neko_error('Device memcpy async (host-to-device) failed')
420 end if
421 else if (dir .eq. device_to_host) then
422 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetohost, stream) &
423 .ne. cudasuccess) then
424 call neko_error('Device memcpy async (device-to-host) failed')
425 end if
426 else if (dir .eq. device_to_device) then
427 if (cudamemcpyasync(ptr_h, x_d, s, cudamemcpydevicetodevice, stream) &
428 .ne. cudasuccess) then
429 call neko_error('Device memcpy async (device-to-device) failed')
430 end if
431 else
432 call neko_error('Device memcpy failed (invalid direction')
433 end if
434 if (sync_device) then
435 call device_sync_stream(stream)
436 end if
437#elif HAVE_OPENCL
438 if (sync_device) then
439 if (dir .eq. host_to_device) then
440 if (clenqueuewritebuffer(stream, x_d, cl_true, 0_i8, s, &
441 ptr_h, 0, c_null_ptr, c_null_ptr) &
442 .ne. cl_success) then
443 call neko_error('Device memcpy (host-to-device) failed')
444 end if
445 else if (dir .eq. device_to_host) then
446 if (clenqueuereadbuffer(stream, x_d, cl_true, 0_i8, s, ptr_h, &
447 0, c_null_ptr, c_null_ptr) &
448 .ne. cl_success) then
449 call neko_error('Device memcpy (device-to-host) failed')
450 end if
451 else if (dir .eq. device_to_device) then
452 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
453 0, c_null_ptr, c_null_ptr) &
454 .ne. cl_success) then
455 call neko_error('Device memcpy (device-to-device) failed')
456 end if
457 else
458 call neko_error('Device memcpy failed (invalid direction')
459 end if
460 else
461 if (dir .eq. host_to_device) then
462 if (clenqueuewritebuffer(stream, x_d, cl_false, 0_i8, s, &
463 ptr_h, 0, c_null_ptr, c_null_ptr) &
464 .ne. cl_success) then
465 call neko_error('Device memcpy (host-to-device) failed')
466 end if
467 else if (dir .eq. device_to_host) then
468 if (clenqueuereadbuffer(stream, x_d, cl_false, 0_i8, s, ptr_h,&
469 0, c_null_ptr, c_null_ptr) &
470 .ne. cl_success) then
471 call neko_error('Device memcpy (device-to-host) failed')
472 end if
473 else if (dir .eq. device_to_device) then
474 if (clenqueuecopybuffer(stream, x_d, ptr_h, 0_i8, 0_i8, s, &
475 0, c_null_ptr, c_null_ptr) &
476 .ne. cl_success) then
477 call neko_error('Device memcpy (device-to-device) failed')
478 end if
479 else
480 call neko_error('Device memcpy failed (invalid direction')
481 end if
482 end if
483#endif
484 end subroutine device_memcpy_common
485
487 subroutine device_associate_r1(x, x_d)
488 class(*), intent(inout), target :: x(:)
489 type(c_ptr), intent(inout) :: x_d
490 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
491
492 select type (x)
493 type is (integer)
494 htbl_ptr_h%ptr = c_loc(x)
495 type is (integer(i8))
496 htbl_ptr_h%ptr = c_loc(x)
497 type is (real)
498 htbl_ptr_h%ptr = c_loc(x)
499 type is (double precision)
500 htbl_ptr_h%ptr = c_loc(x)
501 class default
502 call neko_error('Unknown Fortran type')
503 end select
504
505 htbl_ptr_d%ptr = x_d
506
507 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
508
509 end subroutine device_associate_r1
510
512 subroutine device_associate_r2(x, x_d)
513 class(*), intent(inout), target :: x(:,:)
514 type(c_ptr), intent(inout) :: x_d
515 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
516
517 select type (x)
518 type is (integer)
519 htbl_ptr_h%ptr = c_loc(x)
520 type is (integer(i8))
521 htbl_ptr_h%ptr = c_loc(x)
522 type is (real)
523 htbl_ptr_h%ptr = c_loc(x)
524 type is (double precision)
525 htbl_ptr_h%ptr = c_loc(x)
526 class default
527 call neko_error('Unknown Fortran type')
528 end select
529
530 htbl_ptr_d%ptr = x_d
531
532 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
533
534 end subroutine device_associate_r2
535
537 subroutine device_associate_r3(x, x_d)
538 class(*), intent(inout), target :: x(:,:,:)
539 type(c_ptr), intent(inout) :: x_d
540 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
541
542 select type (x)
543 type is (integer)
544 htbl_ptr_h%ptr = c_loc(x)
545 type is (integer(i8))
546 htbl_ptr_h%ptr = c_loc(x)
547 type is (real)
548 htbl_ptr_h%ptr = c_loc(x)
549 type is (double precision)
550 htbl_ptr_h%ptr = c_loc(x)
551 class default
552 call neko_error('Unknown Fortran type')
553 end select
554
555 htbl_ptr_d%ptr = x_d
556
557 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
558
559 end subroutine device_associate_r3
560
562 subroutine device_associate_r4(x, x_d)
563 class(*), intent(inout), target :: x(:,:,:,:)
564 type(c_ptr), intent(inout) :: x_d
565 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
566
567 select type (x)
568 type is (integer)
569 htbl_ptr_h%ptr = c_loc(x)
570 type is (integer(i8))
571 htbl_ptr_h%ptr = c_loc(x)
572 type is (real)
573 htbl_ptr_h%ptr = c_loc(x)
574 type is (double precision)
575 htbl_ptr_h%ptr = c_loc(x)
576 class default
577 call neko_error('Unknown Fortran type')
578 end select
579
580 htbl_ptr_d%ptr = x_d
581
582 call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
583
584 end subroutine device_associate_r4
585
588 class(*), intent(inout), target :: x(:)
589 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
590
591 select type (x)
592 type is (integer)
593 htbl_ptr_h%ptr = c_loc(x)
594 type is (integer(i8))
595 htbl_ptr_h%ptr = c_loc(x)
596 type is (real)
597 htbl_ptr_h%ptr = c_loc(x)
598 type is (double precision)
599 htbl_ptr_h%ptr = c_loc(x)
600 class default
601 call neko_error('Unknown Fortran type')
602 end select
603
604 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
605 call device_addrtbl%remove(htbl_ptr_h)
606 end if
607
608 end subroutine device_deassociate_r1
609
612 class(*), intent(inout), target :: x(:,:)
613 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
614
615 select type (x)
616 type is (integer)
617 htbl_ptr_h%ptr = c_loc(x)
618 type is (integer(i8))
619 htbl_ptr_h%ptr = c_loc(x)
620 type is (real)
621 htbl_ptr_h%ptr = c_loc(x)
622 type is (double precision)
623 htbl_ptr_h%ptr = c_loc(x)
624 class default
625 call neko_error('Unknown Fortran type')
626 end select
627
628 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
629 call device_addrtbl%remove(htbl_ptr_h)
630 end if
631
632 end subroutine device_deassociate_r2
633
636 class(*), intent(inout), target :: x(:,:,:)
637 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
638
639 select type (x)
640 type is (integer)
641 htbl_ptr_h%ptr = c_loc(x)
642 type is (integer(i8))
643 htbl_ptr_h%ptr = c_loc(x)
644 type is (real)
645 htbl_ptr_h%ptr = c_loc(x)
646 type is (double precision)
647 htbl_ptr_h%ptr = c_loc(x)
648 class default
649 call neko_error('Unknown Fortran type')
650 end select
651
652 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
653 call device_addrtbl%remove(htbl_ptr_h)
654 end if
655
656 end subroutine device_deassociate_r3
657
660 class(*), intent(inout), target :: x(:,:,:,:)
661 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
662
663 select type (x)
664 type is (integer)
665 htbl_ptr_h%ptr = c_loc(x)
666 type is (integer(i8))
667 htbl_ptr_h%ptr = c_loc(x)
668 type is (real)
669 htbl_ptr_h%ptr = c_loc(x)
670 type is (double precision)
671 htbl_ptr_h%ptr = c_loc(x)
672 class default
673 call neko_error('Unknown Fortran type')
674 end select
675
676 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
677 call device_addrtbl%remove(htbl_ptr_h)
678 end if
679
680 end subroutine device_deassociate_r4
681
683 subroutine device_map_r1(x, x_d, n)
684 integer, intent(in) :: n
685 class(*), intent(inout), target :: x(:)
686 type(c_ptr), intent(inout) :: x_d
687 integer(c_size_t) :: s
688
689 if (c_associated(x_d)) then
690 call neko_error('Device pointer already associated')
691 end if
692
693 select type (x)
694 type is (integer)
695 s = n * 4
696 type is (integer(i8))
697 s = n * 8
698 type is (real)
699 s = n * 4
700 type is (double precision)
701 s = n * 8
702 class default
703 call neko_error('Unknown Fortran type')
704 end select
705
706 call device_alloc(x_d, s)
707 call device_associate(x, x_d)
708
709 end subroutine device_map_r1
710
712 subroutine device_map_r2(x, x_d, n)
713 integer, intent(in) :: n
714 class(*), intent(inout), target :: x(:,:)
715 type(c_ptr), intent(inout) :: x_d
716 integer(c_size_t) :: s
717
718 if (c_associated(x_d)) then
719 call neko_error('Device pointer already associated')
720 end if
721
722 select type (x)
723 type is (integer)
724 s = n * 4
725 type is (integer(i8))
726 s = n * 8
727 type is (real)
728 s = n * 4
729 type is (double precision)
730 s = n * 8
731 class default
732 call neko_error('Unknown Fortran type')
733 end select
734
735 call device_alloc(x_d, s)
736 call device_associate(x, x_d)
737
738 end subroutine device_map_r2
739
741 subroutine device_map_r3(x, x_d, n)
742 integer, intent(in) :: n
743 class(*), intent(inout), target :: x(:,:,:)
744 type(c_ptr), intent(inout) :: x_d
745 integer(c_size_t) :: s
746
747 if (c_associated(x_d)) then
748 call neko_error('Device pointer already associated')
749 end if
750
751 select type (x)
752 type is (integer)
753 s = n * 4
754 type is (integer(i8))
755 s = n * 8
756 type is (real)
757 s = n * 4
758 type is (double precision)
759 s = n * 8
760 class default
761 call neko_error('Unknown Fortran type')
762 end select
763
764 call device_alloc(x_d, s)
765 call device_associate(x, x_d)
766
767 end subroutine device_map_r3
768
770 subroutine device_map_r4(x, x_d, n)
771 integer, intent(in) :: n
772 class(*), intent(inout), target :: x(:,:,:,:)
773 type(c_ptr), intent(inout) :: x_d
774 integer(c_size_t) :: s
775
776 if (c_associated(x_d)) then
777 call neko_error('Device pointer already associated')
778 end if
779
780 select type (x)
781 type is (integer)
782 s = n * 4
783 type is (integer(i8))
784 s = n * 8
785 type is (real)
786 s = n * 4
787 type is (double precision)
788 s = n * 8
789 class default
790 call neko_error('Unknown Fortran type')
791 end select
792
793 call device_alloc(x_d, s)
794 call device_associate(x, x_d)
795
796 end subroutine device_map_r4
797
799 function device_associated_r1(x) result(assoc)
800 class(*), intent(inout), target :: x(:)
801 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
802 logical :: assoc
803
804 select type (x)
805 type is (integer)
806 htbl_ptr_h%ptr = c_loc(x)
807 type is (integer(i8))
808 htbl_ptr_h%ptr = c_loc(x)
809 type is (real)
810 htbl_ptr_h%ptr = c_loc(x)
811 type is (double precision)
812 htbl_ptr_h%ptr = c_loc(x)
813 class default
814 call neko_error('Unknown Fortran type')
815 end select
816
817 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
818 assoc = .true.
819 else
820 assoc = .false.
821 end if
822
823 end function device_associated_r1
824
826 function device_associated_r2(x) result(assoc)
827 class(*), intent(inout), target :: x(:,:)
828 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
829 logical :: assoc
830
831 select type (x)
832 type is (integer)
833 htbl_ptr_h%ptr = c_loc(x)
834 type is (integer(i8))
835 htbl_ptr_h%ptr = c_loc(x)
836 type is (real)
837 htbl_ptr_h%ptr = c_loc(x)
838 type is (double precision)
839 htbl_ptr_h%ptr = c_loc(x)
840 class default
841 call neko_error('Unknown Fortran type')
842 end select
843
844 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
845 assoc = .true.
846 else
847 assoc = .false.
848 end if
849
850 end function device_associated_r2
851
853 function device_associated_r3(x) result(assoc)
854 class(*), intent(inout), target :: x(:,:,:)
855 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
856 logical :: assoc
857
858 select type (x)
859 type is (integer)
860 htbl_ptr_h%ptr = c_loc(x)
861 type is (integer(i8))
862 htbl_ptr_h%ptr = c_loc(x)
863 type is (real)
864 htbl_ptr_h%ptr = c_loc(x)
865 type is (double precision)
866 htbl_ptr_h%ptr = c_loc(x)
867 class default
868 call neko_error('Unknown Fortran type')
869 end select
870
871 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
872 assoc = .true.
873 else
874 assoc = .false.
875 end if
876
877 end function device_associated_r3
878
880 function device_associated_r4(x) result(assoc)
881 class(*), intent(inout), target :: x(:,:,:,:)
882 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
883 logical :: assoc
884
885 select type (x)
886 type is (integer)
887 htbl_ptr_h%ptr = c_loc(x)
888 type is (integer(i8))
889 htbl_ptr_h%ptr = c_loc(x)
890 type is (real)
891 htbl_ptr_h%ptr = c_loc(x)
892 type is (double precision)
893 htbl_ptr_h%ptr = c_loc(x)
894 class default
895 call neko_error('Unknown Fortran type')
896 end select
897
898 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
899 assoc = .true.
900 else
901 assoc = .false.
902 end if
903
904 end function device_associated_r4
905
908 class(*), intent(in), target :: x(:)
909 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
910 type(c_ptr) :: device_get_ptr_r1
911
912 device_get_ptr_r1 = c_null_ptr
913
914 select type (x)
915 type is (integer)
916 htbl_ptr_h%ptr = c_loc(x)
917 type is (integer(i8))
918 htbl_ptr_h%ptr = c_loc(x)
919 type is (real)
920 htbl_ptr_h%ptr = c_loc(x)
921 type is (double precision)
922 htbl_ptr_h%ptr = c_loc(x)
923 class default
924 call neko_error('Unknown Fortran type')
925 end select
926
927 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
928 device_get_ptr_r1 = htbl_ptr_d%ptr
929 else
930 call neko_error('Array not associated with device')
931 end if
932 end function device_get_ptr_r1
933
936 class(*), intent(in), target :: x(:,:)
937 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
938 type(c_ptr) :: device_get_ptr_r2
939
940 device_get_ptr_r2 = c_null_ptr
941
942 select type (x)
943 type is (integer)
944 htbl_ptr_h%ptr = c_loc(x)
945 type is (integer(i8))
946 htbl_ptr_h%ptr = c_loc(x)
947 type is (real)
948 htbl_ptr_h%ptr = c_loc(x)
949 type is (double precision)
950 htbl_ptr_h%ptr = c_loc(x)
951 class default
952 call neko_error('Unknown Fortran type')
953 end select
954
955 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
956 device_get_ptr_r2 = htbl_ptr_d%ptr
957 else
958 call neko_error('Array not associated with device')
959 end if
960 end function device_get_ptr_r2
961
964 class(*), intent(in), target :: x(:,:,:)
965 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
966 type(c_ptr) :: device_get_ptr_r3
967
968 device_get_ptr_r3 = c_null_ptr
969
970 select type (x)
971 type is (integer)
972 htbl_ptr_h%ptr = c_loc(x)
973 type is (integer(i8))
974 htbl_ptr_h%ptr = c_loc(x)
975 type is (real)
976 htbl_ptr_h%ptr = c_loc(x)
977 type is (double precision)
978 htbl_ptr_h%ptr = c_loc(x)
979 class default
980 call neko_error('Unknown Fortran type')
981 end select
982
983 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
984 device_get_ptr_r3 = htbl_ptr_d%ptr
985 else
986 call neko_error('Array not associated with device')
987 end if
988 end function device_get_ptr_r3
989
992 class(*), intent(in), target :: x(:,:,:,:)
993 type(h_cptr_t) :: htbl_ptr_h, htbl_ptr_d
994 type(c_ptr) :: device_get_ptr_r4
995
996 device_get_ptr_r4 = c_null_ptr
997
998 select type (x)
999 type is (integer)
1000 htbl_ptr_h%ptr = c_loc(x)
1001 type is (integer(i8))
1002 htbl_ptr_h%ptr = c_loc(x)
1003 type is (real)
1004 htbl_ptr_h%ptr = c_loc(x)
1005 type is (double precision)
1006 htbl_ptr_h%ptr = c_loc(x)
1007 class default
1008 call neko_error('Unknown Fortran type')
1009 end select
1010
1011 if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
1012 device_get_ptr_r4 = htbl_ptr_d%ptr
1013 else
1014 call neko_error('Array not associated with device')
1015 end if
1016 end function device_get_ptr_r4
1017
1020#ifdef HAVE_HIP
1021 if (hipdevicesynchronize() .ne. hipsuccess) then
1022 call neko_error('Error during device sync')
1023 end if
1024#elif HAVE_CUDA
1025 if (cudadevicesynchronize() .ne. cudasuccess) then
1026 call neko_error('Error during device sync')
1027 end if
1028#elif HAVE_OPENCL
1029 if (clfinish(glb_cmd_queue) .ne. cl_success) then
1030 call neko_error('Error during device sync')
1031 end if
1032#endif
1033 end subroutine device_sync_device
1034
1036 subroutine device_sync_stream(stream)
1037 type(c_ptr), intent(in) :: stream
1038#ifdef HAVE_HIP
1039 if (hipstreamsynchronize(stream) .ne. hipsuccess) then
1040 call neko_error('Error during stream sync')
1041 end if
1042#elif HAVE_CUDA
1043 if (cudastreamsynchronize(stream) .ne. cudasuccess) then
1044 call neko_error('Error during stream sync')
1045 end if
1046#elif HAVE_OPENCL
1047 if (clfinish(stream) .ne. cl_success) then
1048 call neko_error('Error during stream sync')
1049 end if
1050#endif
1051 end subroutine device_sync_stream
1052
1054 subroutine device_stream_create(stream, flags)
1055 type(c_ptr), intent(inout) :: stream
1056 integer, optional :: flags
1057 integer :: ierr
1058#ifdef HAVE_HIP
1059 if (present(flags)) then
1060 if (hipstreamcreatewithflags(stream, flags) .ne. hipsuccess) then
1061 call neko_error('Error during stream create (w. flags)')
1062 end if
1063 else
1064 if (hipstreamcreate(stream) .ne. hipsuccess) then
1065 call neko_error('Error during stream create')
1066 end if
1067 end if
1068#elif HAVE_CUDA
1069 if (present(flags)) then
1070 if (cudastreamcreatewithflags(stream, flags) .ne. cudasuccess) then
1071 call neko_error('Error during stream create (w. flags)')
1072 end if
1073 else
1074 if (cudastreamcreate(stream) .ne. cudasuccess) then
1075 call neko_error('Error during stream create')
1076 end if
1077 end if
1078#elif HAVE_OPENCL
1079 stream = clcreatecommandqueue(glb_ctx, glb_device_id, 0_i8, ierr)
1080 if (ierr .ne. cl_success) then
1081 call neko_error('Error during stream create')
1082 end if
1083#endif
1084 end subroutine device_stream_create
1085
1087 subroutine device_stream_create_with_priority(stream, flags, prio)
1088 type(c_ptr), intent(inout) :: stream
1089 integer, intent(in) :: flags, prio
1090#ifdef HAVE_HIP
1091 if (hipstreamcreatewithpriority(stream, flags, prio) .ne. hipsuccess) then
1092 call neko_error('Error during stream create (w. priority)')
1093 end if
1094#elif HAVE_CUDA
1095 if (cudastreamcreatewithpriority(stream, flags, prio) .ne. cudasuccess) then
1096 call neko_error('Error during stream create (w. priority)')
1097 end if
1098#elif HAVE_OPENCL
1099 call neko_error('Not implemented yet')
1100#endif
1102
1104 subroutine device_stream_destroy(stream)
1105 type(c_ptr), intent(inout) :: stream
1106#ifdef HAVE_HIP
1107 if (hipstreamdestroy(stream) .ne. hipsuccess) then
1108 call neko_error('Error during stream destroy')
1109 end if
1110#elif HAVE_CUDA
1111 if (cudastreamdestroy(stream) .ne. cudasuccess) then
1112 call neko_error('Error during stream destroy')
1113 end if
1114#elif HAVE_OPENCL
1115 if (clreleasecommandqueue(stream) .ne. cl_success) then
1116 call neko_error('Error during stream destroy')
1117 end if
1118#endif
1119 end subroutine device_stream_destroy
1120
1122 subroutine device_stream_wait_event(stream, event, flags)
1123 type(c_ptr), intent(in) :: stream
1124 type(c_ptr), target, intent(in) :: event
1125 integer :: flags
1126#ifdef HAVE_HIP
1127 if (hipstreamwaitevent(stream, event, flags) .ne. hipsuccess) then
1128 call neko_error('Error during stream sync')
1129 end if
1130#elif HAVE_CUDA
1131 if (cudastreamwaitevent(stream, event, flags) .ne. cudasuccess) then
1132 call neko_error('Error during stream sync')
1133 end if
1134#elif HAVE_OPENCL
1135 if (clenqueuebarrier(stream) .ne. cl_success) then
1136 call neko_error('Error during barrier')
1137 end if
1138 if (clenqueuewaitforevents(stream, 1, c_loc(event)) .ne. cl_success) then
1139 call neko_error('Error during stream sync')
1140 end if
1141#endif
1142 end subroutine device_stream_wait_event
1143
1146#if HAVE_CUDA
1147 if (cudaprofilerstart() .ne. cudasuccess) then
1148 call neko_error('Error starting profiler')
1149 end if
1150#endif
1151 end subroutine device_profiler_start
1152
1155#if HAVE_CUDA
1156 if (cudaprofilerstop() .ne. cudasuccess) then
1157 call neko_error('Error stopping profiler')
1158 end if
1159#endif
1160 end subroutine device_profiler_stop
1161
1163 subroutine device_event_create(event, flags)
1164 type(c_ptr), intent(inout) :: event
1165 integer, optional :: flags
1166 integer :: ierr
1167#ifdef HAVE_HIP
1168 if (present(flags)) then
1169 if (hipeventcreatewithflags(event, flags) .ne. hipsuccess) then
1170 call neko_error('Error during event create (w. flags)')
1171 end if
1172 else
1173 if (hipeventcreate(event) .ne. hipsuccess) then
1174 call neko_error('Error during event create')
1175 end if
1176 end if
1177#elif HAVE_CUDA
1178 if (present(flags)) then
1179 if (cudaeventcreatewithflags(event, flags) .ne. cudasuccess) then
1180 call neko_error('Error during event create (w. flags)')
1181 end if
1182 else
1183 if (cudaeventcreate(event) .ne. cudasuccess) then
1184 call neko_error('Error during event create')
1185 end if
1186 end if
1187#elif HAVE_OPENCL
1188 event = c_null_ptr
1189#endif
1190 end subroutine device_event_create
1191
1193 subroutine device_event_destroy(event)
1194 type(c_ptr), intent(inout) :: event
1195#ifdef HAVE_HIP
1196 if (hipeventdestroy(event) .ne. hipsuccess) then
1197 call neko_error('Error during event destroy')
1198 end if
1199#elif HAVE_CUDA
1200 if (cudaeventdestroy(event) .ne. cudasuccess) then
1201 call neko_error('Error during event destroy')
1202 end if
1203#elif HAVE_OPENCL
1204 event = c_null_ptr
1205#endif
1206 end subroutine device_event_destroy
1207
1209 subroutine device_event_record(event, stream)
1210 type(c_ptr), target, intent(in) :: event
1211 type(c_ptr), intent(in) :: stream
1212#ifdef HAVE_HIP
1213 if (hipeventrecord(event, stream) .ne. hipsuccess) then
1214 call neko_error('Error recording an event')
1215 end if
1216#elif HAVE_CUDA
1217 if (cudaeventrecord(event, stream) .ne. cudasuccess) then
1218 call neko_error('Error recording an event')
1219 end if
1220#elif HAVE_OPENCL
1221 if (clenqueuemarker(stream, c_loc(event)) .ne. cl_success) then
1222 call neko_error('Error recording an event')
1223 end if
1224#endif
1225 end subroutine device_event_record
1226
1228 subroutine device_event_sync(event)
1229 type(c_ptr), target, intent(in) :: event
1230#ifdef HAVE_HIP
1231 if (hipeventsynchronize(event) .ne. hipsuccess) then
1232 call neko_error('Error during event sync')
1233 end if
1234#elif HAVE_CUDA
1235 if (cudaeventsynchronize(event) .ne. cudasuccess) then
1236 call neko_error('Error during event sync')
1237 end if
1238#elif HAVE_OPENCL
1239 if (c_associated(event)) then
1240 if (clwaitforevents(1, c_loc(event)) .ne. cl_success) then
1241 call neko_error('Error during event sync')
1242 end if
1243 end if
1244#endif
1245 end subroutine device_event_sync
1246
1247end module device
double real
Associate a Fortran array to a (allocated) device pointer.
Definition device.F90:63
Check if a Fortran array is assoicated with a device pointer.
Definition device.F90:69
Deassociate a Fortran array from a device pointer.
Definition device.F90:75
Return the device pointer for an associated Fortran array.
Definition device.F90:81
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Synchronize a device or stream.
Definition device.F90:87
Fortran CUDA interface.
Definition cuda_intf.F90:34
subroutine cuda_device_name(name)
subroutine cuda_finalize
@ cudamemcpydevicetohost
Definition cuda_intf.F90:66
@ cudamemcpydevicetodevice
Definition cuda_intf.F90:67
@ cudamemcpyhosttodevice
Definition cuda_intf.F90:65
subroutine cuda_init
integer function cuda_device_count()
Return the number of avaialble CUDA devices.
type(c_ptr), bind(C) glb_cmd_queue
Global HIP command queue.
Definition cuda_intf.F90:43
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_record(event, stream)
Record a device event.
Definition device.F90:1210
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1229
subroutine device_associate_r2(x, x_d)
Associate a Fortran rank 2 array to a (allocated) device pointer.
Definition device.F90:513
subroutine, public device_finalize
Definition device.F90:122
integer, parameter, public device_to_device
Definition device.F90:47
type(c_ptr) function device_get_ptr_r4(x)
Return the device pointer for an associated Fortran rank 4 array.
Definition device.F90:992
type(c_ptr) function device_get_ptr_r1(x)
Return the device pointer for an associated Fortran rank 1 array.
Definition device.F90:908
integer, parameter, public host_to_device
Definition device.F90:47
subroutine device_map_r3(x, x_d, n)
Map a Fortran rank 3 array to a device (allocate and associate)
Definition device.F90:742
subroutine, private device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
Copy data between host and device.
Definition device.F90:387
logical function device_associated_r3(x)
Check if a Fortran rank 3 array is assoicated with a device pointer.
Definition device.F90:854
type(htable_cptr_t), private device_addrtbl
Table of host to device address mappings.
Definition device.F90:92
subroutine, public device_profiler_stop()
Stop device profiling.
Definition device.F90:1155
subroutine device_deassociate_r3(x)
Deassociate a Fortran rank 3 array from a device pointer.
Definition device.F90:636
subroutine, public device_sync_stream(stream)
Synchronize a device stream.
Definition device.F90:1037
type(c_ptr) function device_get_ptr_r3(x)
Return the device pointer for an associated Fortran rank 3 array.
Definition device.F90:964
subroutine, public device_profiler_start()
Start device profiling.
Definition device.F90:1146
subroutine device_map_r2(x, x_d, n)
Map a Fortran rank 2 array to a device (allocate and associate)
Definition device.F90:713
subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 2 arrays)
Definition device.F90:242
subroutine device_map_r4(x, x_d, n)
Map a Fortran rank 4 array to a device (allocate and associate)
Definition device.F90:771
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
integer, parameter, public device_to_host
Definition device.F90:47
subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
Copy data between host and device (or device and device) (c-pointers)
Definition device.F90:358
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1194
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:164
subroutine, public device_stream_create(stream, flags)
Create a device stream/command queue.
Definition device.F90:1055
subroutine device_deassociate_r4(x)
Deassociate a Fortran rank 4 array from a device pointer.
Definition device.F90:660
subroutine device_sync_device()
Synchronize the device.
Definition device.F90:1020
subroutine device_associate_r1(x, x_d)
Associate a Fortran rank 1 array to a (allocated) device pointer.
Definition device.F90:488
subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 4 arrays)
Definition device.F90:318
subroutine, public device_stream_wait_event(stream, event, flags)
Synchronize a device stream with an event.
Definition device.F90:1123
subroutine device_map_r1(x, x_d, n)
Map a Fortran rank 1 array to a device (allocate and associate)
Definition device.F90:684
subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 1 arrays)
Definition device.F90:204
subroutine device_stream_create_with_priority(stream, flags, prio)
Create a device stream/command queue with priority.
Definition device.F90:1088
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1164
subroutine, public device_name(name)
Definition device.F90:138
logical function device_associated_r4(x)
Check if a Fortran rank 4 array is assoicated with a device pointer.
Definition device.F90:881
integer function device_count()
Return the number of available devices.
Definition device.F90:151
logical function device_associated_r2(x)
Check if a Fortran rank 2 array is assoicated with a device pointer.
Definition device.F90:827
type(c_ptr) function device_get_ptr_r2(x)
Return the device pointer for an associated Fortran rank 2 array.
Definition device.F90:936
subroutine device_associate_r4(x, x_d)
Associate a Fortran rank 4 array to a (allocated) device pointer.
Definition device.F90:563
subroutine device_deassociate_r1(x)
Deassociate a Fortran rank 1 array from a device pointer.
Definition device.F90:588
subroutine device_deassociate_r2(x)
Deassociate a Fortran rank 2 array from a device pointer.
Definition device.F90:612
subroutine, public device_init
Definition device.F90:107
subroutine device_associate_r3(x, x_d)
Associate a Fortran rank 3 array to a (allocated) device pointer.
Definition device.F90:538
logical function device_associated_r1(x)
Check if a Fortran rank 1 array is assoicated with a device pointer.
Definition device.F90:800
subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
Copy data between host and device (rank 3 arrays)
Definition device.F90:280
subroutine, public device_stream_destroy(stream)
Destroy a device stream/command queue.
Definition device.F90:1105
Dummy device interface.
Fortran HIP interface.
Definition hip_intf.F90:34
subroutine hip_device_name(name)
Definition hip_intf.F90:266
subroutine hip_init
Definition hip_intf.F90:238
@ hipmemcpydevicetohost
Definition hip_intf.F90:84
@ hipmemcpydevicetodevice
Definition hip_intf.F90:85
@ hipmemcpyhosttodevice
Definition hip_intf.F90:83
subroutine hip_finalize
Definition hip_intf.F90:256
integer function hip_device_count()
Return the number of available HIP devices.
Definition hip_intf.F90:283
Implements a hash table ADT.
Definition htable.f90:36
integer, parameter, public i8
Definition num_types.f90:7
Fortran OpenCL interface.
subroutine opencl_device_name(name)
integer function opencl_device_count()
Return the number of OpenCL devices.
subroutine opencl_init
subroutine opencl_finalize
OpenCL JIT program library.
Definition prgm_lib.F90:2
subroutine opencl_prgm_lib_release
Definition prgm_lib.F90:91
Utilities.
Definition utils.f90:35
C pointer based hash table.
Definition htable.f90:142