59 real(kind=
rp),
allocatable :: local_gs(:)
60 integer,
allocatable :: local_dof_gs(:)
61 integer,
allocatable :: local_gs_dof(:)
62 integer,
allocatable :: local_blk_len(:)
63 real(kind=
rp),
allocatable :: shared_gs(:)
64 integer,
allocatable :: shared_dof_gs(:)
65 integer,
allocatable :: shared_gs_dof(:)
66 integer,
allocatable :: shared_blk_len(:)
71 integer :: nlocal_blks
72 integer :: nshared_blks
73 integer :: local_facet_offset
74 integer :: shared_facet_offset
92 class(
gs_t),
intent(inout) :: gs
93 type(
dofmap_t),
target,
intent(inout) :: dofmap
94 character(len=LOG_SIZE) :: log_buf
95 character(len=20) :: bcknd_str
96 integer,
optional :: bcknd
97 integer :: i, j, ierr, bcknd_
98 integer(i8) :: glb_nshared, glb_nlocal
99 logical :: use_device_mpi
100 real(kind=
rp),
allocatable :: tmp(:)
101 type(c_ptr) :: tmp_d = c_null_ptr
102 integer :: strtgy(4) = (/ int(b
'00'), int(b
'01'), int(b
'10'), int(b
'11') /)
103 integer :: avg_strtgy, env_len
104 character(len=255) :: env_strtgy
105 real(kind=
dp) :: strtgy_time(4)
109 call neko_log%section(
'Gather-Scatter')
118 if (use_device_mpi)
then
119 call neko_log%message(
'Comm : Device MPI')
126 call gs%comm%init_dofs()
131 glb_nlocal = int(gs%nlocal, i8)
132 glb_nshared = int(gs%nshared, i8)
135 call mpi_reduce(mpi_in_place, glb_nlocal, 1, &
136 mpi_integer8, mpi_sum, 0,
neko_comm, ierr)
138 call mpi_reduce(mpi_in_place, glb_nshared, 1, &
139 mpi_integer8, mpi_sum, 0,
neko_comm, ierr)
141 call mpi_reduce(glb_nlocal, glb_nlocal, 1, &
142 mpi_integer8, mpi_sum, 0,
neko_comm, ierr)
144 call mpi_reduce(glb_nshared, glb_nshared, 1, &
145 mpi_integer8, mpi_sum, 0,
neko_comm, ierr)
148 write(log_buf,
'(A,I12)')
'Avg. internal: ', glb_nlocal/
pe_size
150 write(log_buf,
'(A,I12)')
'Avg. external: ', glb_nshared/
pe_size
153 if (
present(bcknd))
then
177 bcknd_str =
' opencl'
183 call neko_error(
'Unknown Gather-scatter backend')
186 write(log_buf,
'(A)')
'Backend : ' // trim(bcknd_str)
191 call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
193 if (use_device_mpi)
then
194 select type(b => gs%bcknd)
196 b%shared_on_host = .false.
201 select type(c => gs%comm)
203 call get_environment_variable(
"NEKO_GS_STRTGY", env_strtgy, env_len)
204 if (env_len .eq. 0)
then
205 allocate(tmp(
dofmap%size()))
212 do i = 1,
size(strtgy)
213 c%nb_strtgy = strtgy(i)
216 strtgy_time(i) = mpi_wtime()
220 strtgy_time(i) = (mpi_wtime() - strtgy_time(i)) / 100d0
227 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
229 avg_strtgy = minloc(strtgy_time, 1)
230 call mpi_allreduce(mpi_in_place, avg_strtgy, 1, &
232 avg_strtgy = avg_strtgy /
pe_size
234 write(log_buf,
'(A,B0.2,A)')
'Avg. strtgy : [', &
235 strtgy(avg_strtgy),
']'
238 read(env_strtgy(1:env_len), *) i
240 if (i .lt. 1 .or. i .gt. 4)
then
244 c%nb_strtgy = strtgy(i)
247 write(log_buf,
'(A,B0.2,A)')
'Env. strtgy : [', &
248 strtgy(avg_strtgy),
']'
263 class(
gs_t),
intent(inout) :: gs
267 if (
allocated(gs%local_gs))
then
268 deallocate(gs%local_gs)
271 if (
allocated(gs%local_dof_gs))
then
272 deallocate(gs%local_dof_gs)
275 if (
allocated(gs%local_gs_dof))
then
276 deallocate(gs%local_gs_dof)
279 if (
allocated(gs%local_blk_len))
then
280 deallocate(gs%local_blk_len)
283 if (
allocated(gs%shared_gs))
then
284 deallocate(gs%shared_gs)
287 if (
allocated(gs%shared_dof_gs))
then
288 deallocate(gs%shared_dof_gs)
291 if (
allocated(gs%shared_gs_dof))
then
292 deallocate(gs%shared_gs_dof)
295 if (
allocated(gs%shared_blk_len))
then
296 deallocate(gs%shared_blk_len)
304 call gs%shared_dofs%free()
306 if (
allocated(gs%bcknd))
then
311 if (
allocated(gs%comm))
then
320 type(
gs_t),
target,
intent(inout) :: gs
321 type(
mesh_t),
pointer :: msh
323 type(
stack_i4_t),
target :: local_dof, dof_local, shared_dof, dof_shared
324 type(
stack_i4_t),
target :: local_face_dof, face_dof_local
325 type(
stack_i4_t),
target :: shared_face_dof, face_dof_shared
326 integer :: i, j, k, l, lx, ly, lz, max_id, max_sid, id, lid, dm_size
332 sdm => gs%shared_dofs
337 dm_size =
dofmap%size()/lx
339 call dm%init(dm_size, i)
342 call sdm%init(
dofmap%size(), i)
345 call local_dof%init()
346 call dof_local%init()
348 call local_face_dof%init()
349 call face_dof_local%init()
351 call shared_dof%init()
352 call dof_shared%init()
354 call shared_face_dof%init()
355 call face_dof_shared%init()
365 if (
dofmap%shared_dof(1, 1, 1, i))
then
367 call shared_dof%push(id)
368 call dof_shared%push(lid)
371 call local_dof%push(id)
372 call dof_local%push(lid)
376 if (
dofmap%shared_dof(lx, 1, 1, i))
then
378 call shared_dof%push(id)
379 call dof_shared%push(lid)
382 call local_dof%push(id)
383 call dof_local%push(lid)
387 if (
dofmap%shared_dof(1, ly, 1, i))
then
389 call shared_dof%push(id)
390 call dof_shared%push(lid)
393 call local_dof%push(id)
394 call dof_local%push(lid)
398 if (
dofmap%shared_dof(lx, ly, 1, i))
then
400 call shared_dof%push(id)
401 call dof_shared%push(lid)
404 call local_dof%push(id)
405 call dof_local%push(lid)
409 if (
dofmap%shared_dof(1, 1, lz, i))
then
411 call shared_dof%push(id)
412 call dof_shared%push(lid)
415 call local_dof%push(id)
416 call dof_local%push(lid)
420 if (
dofmap%shared_dof(lx, 1, lz, i))
then
422 call shared_dof%push(id)
423 call dof_shared%push(lid)
426 call local_dof%push(id)
427 call dof_local%push(lid)
431 if (
dofmap%shared_dof(1, ly, lz, i))
then
433 call shared_dof%push(id)
434 call dof_shared%push(lid)
437 call local_dof%push(id)
438 call dof_local%push(lid)
442 if (
dofmap%shared_dof(lx, ly, lz, i))
then
444 call shared_dof%push(id)
445 call dof_shared%push(lid)
448 call local_dof%push(id)
449 call dof_local%push(lid)
462 if (
dofmap%shared_dof(2, 1, 1, i))
then
465 call shared_dof%push(id)
467 call dof_shared%push(id)
472 call local_dof%push(id)
474 call dof_local%push(id)
477 if (
dofmap%shared_dof(2, 1, lz, i))
then
480 call shared_dof%push(id)
482 call dof_shared%push(id)
487 call local_dof%push(id)
489 call dof_local%push(id)
493 if (
dofmap%shared_dof(2, ly, 1, i))
then
496 call shared_dof%push(id)
498 call dof_shared%push(id)
504 call local_dof%push(id)
506 call dof_local%push(id)
509 if (
dofmap%shared_dof(2, ly, lz, i))
then
512 call shared_dof%push(id)
514 call dof_shared%push(id)
519 call local_dof%push(id)
521 call dof_local%push(id)
528 if (
dofmap%shared_dof(1, 2, 1, i))
then
531 call shared_dof%push(id)
533 call dof_shared%push(id)
538 call local_dof%push(id)
540 call dof_local%push(id)
543 if (
dofmap%shared_dof(1, 2, lz, i))
then
546 call shared_dof%push(id)
548 call dof_shared%push(id)
553 call local_dof%push(id)
555 call dof_local%push(id)
559 if (
dofmap%shared_dof(lx, 2, 1, i))
then
562 call shared_dof%push(id)
564 call dof_shared%push(id)
569 call local_dof%push(id)
571 call dof_local%push(id)
574 if (
dofmap%shared_dof(lx, 2, lz, i))
then
577 call shared_dof%push(id)
579 call dof_shared%push(id)
584 call local_dof%push(id)
586 call dof_local%push(id)
592 if (
dofmap%shared_dof(1, 1, 2, i))
then
595 call shared_dof%push(id)
597 call dof_shared%push(id)
602 call local_dof%push(id)
604 call dof_local%push(id)
608 if (
dofmap%shared_dof(lx, 1, 2, i))
then
611 call shared_dof%push(id)
613 call dof_shared%push(id)
618 call local_dof%push(id)
620 call dof_local%push(id)
624 if (
dofmap%shared_dof(1, ly, 2, i))
then
627 call shared_dof%push(id)
629 call dof_shared%push(id)
634 call local_dof%push(id)
636 call dof_local%push(id)
640 if (
dofmap%shared_dof(lx, ly, 2, i))
then
643 call shared_dof%push(id)
645 call dof_shared%push(id)
650 call local_dof%push(id)
652 call dof_local%push(id)
666 if (msh%facet_neigh(3, i) .ne. 0)
then
667 if (
dofmap%shared_dof(2, 1, 1, i))
then
670 call shared_face_dof%push(id)
672 call face_dof_shared%push(id)
677 call local_face_dof%push(id)
679 call face_dof_local%push(id)
684 if (msh%facet_neigh(4, i) .ne. 0)
then
685 if (
dofmap%shared_dof(2, ly, 1, i))
then
688 call shared_face_dof%push(id)
690 call face_dof_shared%push(id)
696 call local_face_dof%push(id)
698 call face_dof_local%push(id)
706 if (msh%facet_neigh(1, i) .ne. 0)
then
707 if (
dofmap%shared_dof(1, 2, 1, i))
then
710 call shared_face_dof%push(id)
712 call face_dof_shared%push(id)
717 call local_face_dof%push(id)
719 call face_dof_local%push(id)
724 if (msh%facet_neigh(2, i) .ne. 0)
then
725 if (
dofmap%shared_dof(lx, 2, 1, i))
then
728 call shared_face_dof%push(id)
730 call face_dof_shared%push(id)
735 call local_face_dof%push(id)
737 call face_dof_local%push(id)
746 if (msh%facet_neigh(1, i) .ne. 0)
then
747 if (
dofmap%shared_dof(1, 2, 2, i))
then
751 call shared_face_dof%push(id)
753 call face_dof_shared%push(id)
760 call local_face_dof%push(id)
762 call face_dof_local%push(id)
768 if (msh%facet_neigh(2, i) .ne. 0)
then
769 if (
dofmap%shared_dof(lx, 2, 2, i))
then
773 call shared_face_dof%push(id)
775 call face_dof_shared%push(id)
782 call local_face_dof%push(id)
784 call face_dof_local%push(id)
791 if (msh%facet_neigh(3, i) .ne. 0)
then
792 if (
dofmap%shared_dof(2, 1, 2, i))
then
796 call shared_face_dof%push(id)
798 call face_dof_shared%push(id)
805 call local_face_dof%push(id)
807 call face_dof_local%push(id)
813 if (msh%facet_neigh(4, i) .ne. 0)
then
814 if (
dofmap%shared_dof(2, ly, 2, i))
then
818 call shared_face_dof%push(id)
820 call face_dof_shared%push(id)
827 call local_face_dof%push(id)
829 call face_dof_local%push(id)
836 if (msh%facet_neigh(5, i) .ne. 0)
then
837 if (
dofmap%shared_dof(2, 2, 1, i))
then
841 call shared_face_dof%push(id)
843 call face_dof_shared%push(id)
850 call local_face_dof%push(id)
852 call face_dof_local%push(id)
858 if (msh%facet_neigh(6, i) .ne. 0)
then
859 if (
dofmap%shared_dof(2, 2, lz, i))
then
863 call shared_face_dof%push(id)
865 call face_dof_shared%push(id)
872 call local_face_dof%push(id)
874 call face_dof_local%push(id)
885 gs%nlocal = local_dof%size() + local_face_dof%size()
886 gs%local_facet_offset = local_dof%size() + 1
889 allocate(gs%local_dof_gs(gs%nlocal))
896 select type(dof_array => local_dof%data)
900 gs%local_dof_gs(i) = dof_array(i)
903 call local_dof%free()
910 select type(dof_array => local_face_dof%data)
912 do i = 1, local_face_dof%size()
913 gs%local_dof_gs(i + j) = dof_array(i)
916 call local_face_dof%free()
919 allocate(gs%local_gs_dof(gs%nlocal))
926 select type(dof_array => dof_local%data)
930 gs%local_gs_dof(i) = dof_array(i)
933 call dof_local%free()
938 select type(dof_array => face_dof_local%data)
940 do i = 1, face_dof_local%size()
941 gs%local_gs_dof(i+j) = dof_array(i)
944 call face_dof_local%free()
947 gs%nlocal, 1, gs%nlocal)
950 gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
953 allocate(gs%local_gs(gs%nlocal))
955 gs%nshared = shared_dof%size() + shared_face_dof%size()
956 gs%shared_facet_offset = shared_dof%size() + 1
959 allocate(gs%shared_dof_gs(gs%nshared))
966 select type(dof_array => shared_dof%data)
968 j = shared_dof%size()
970 gs%shared_dof_gs(i) = dof_array(i)
973 call shared_dof%free()
980 select type(dof_array => shared_face_dof%data)
982 do i = 1, shared_face_dof%size()
983 gs%shared_dof_gs(i + j) = dof_array(i)
986 call shared_face_dof%free()
989 allocate(gs%shared_gs_dof(gs%nshared))
996 select type(dof_array => dof_shared%data)
998 j = dof_shared%size()
1000 gs%shared_gs_dof(i) = dof_array(i)
1003 call dof_shared%free()
1008 select type(dof_array => face_dof_shared%data)
1010 do i = 1, face_dof_shared%size()
1011 gs%shared_gs_dof(i + j) = dof_array(i)
1014 call face_dof_shared%free()
1017 allocate(gs%shared_gs(gs%nshared))
1019 if (gs%nshared .gt. 0)
then
1021 gs%nshared, 1, gs%nshared)
1023 call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
1024 gs%nshared_blks, gs%nshared, gs%shared_facet_offset)
1032 integer(kind=i8),
intent(inout) :: dof
1033 integer,
intent(inout) :: max_id
1036 if (map_%get(dof, id) .gt. 0)
then
1038 call map_%set(dof, max_id)
1046 integer,
intent(inout) :: n
1047 integer,
dimension(n),
intent(inout) :: dg
1048 integer,
dimension(n),
intent(inout) :: gd
1050 integer :: tmp, i, j, pivot
1054 pivot = dg((lo + hi) / 2)
1058 if (dg(i) .ge. pivot)
exit
1063 if (dg(j) .le. pivot)
exit
1074 else if (i .eq. j)
then
1088 integer,
intent(in) :: n
1089 integer,
intent(in) :: m
1090 integer,
dimension(n),
intent(inout) :: dg
1091 integer,
allocatable,
intent(inout) :: blk_len(:)
1092 integer,
intent(inout) :: nblks
1094 integer :: id, count
1103 do while ( j+1 .le. n .and. dg(j+1) .eq. id)
1107 call blks%push(count)
1111 select type(blk_array => blks%data)
1114 allocate(blk_len(nblks))
1116 blk_len(i) = blk_array(i)
1127 type(
gs_t),
target,
intent(inout) :: gs
1128 integer(kind=i8),
allocatable :: send_buf(:), recv_buf(:)
1129 integer(kind=i2),
allocatable :: shared_flg(:), recv_flg(:)
1132 type(mpi_status) :: status
1133 type(mpi_request) :: send_req, recv_req
1134 integer :: i, j, max_recv, src, dst, ierr, n_recv
1135 integer :: tmp, shared_gs_id
1136 integer :: nshared_unique
1138 nshared_unique = gs%shared_dofs%num_entries()
1140 call it%init(gs%shared_dofs)
1141 allocate(send_buf(nshared_unique))
1144 send_buf(i) = it%key()
1156 call mpi_allreduce(nshared_unique, max_recv, 1, &
1159 allocate(recv_buf(max_recv))
1160 allocate(shared_flg(max_recv))
1161 allocate(recv_flg(max_recv))
1164 do i = 1,
size(gs%dofmap%msh%neigh_order)
1168 if (gs%dofmap%msh%neigh(src))
then
1169 call mpi_irecv(recv_buf, max_recv, mpi_integer8, &
1173 if (gs%dofmap%msh%neigh(dst))
then
1174 call mpi_isend(send_buf, nshared_unique, mpi_integer8, &
1178 if (gs%dofmap%msh%neigh(src))
then
1179 call mpi_wait(recv_req, status, ierr)
1180 call mpi_get_count(status, mpi_integer8, n_recv, ierr)
1183 shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
1184 if (shared_flg(j) .eq. 0)
then
1186 call gs%comm%recv_dof(src)%push(shared_gs_id)
1190 if (gs%comm%recv_dof(src)%size() .gt. 0)
then
1191 call recv_pe%push(src)
1195 if (gs%dofmap%msh%neigh(dst))
then
1196 call mpi_wait(send_req, mpi_status_ignore, ierr)
1197 call mpi_irecv(recv_flg, max_recv, mpi_integer2, &
1201 if (gs%dofmap%msh%neigh(src))
then
1202 call mpi_isend(shared_flg, n_recv, mpi_integer2, &
1206 if (gs%dofmap%msh%neigh(dst))
then
1207 call mpi_wait(recv_req, status, ierr)
1208 call mpi_get_count(status, mpi_integer2, n_recv, ierr)
1211 if (recv_flg(j) .eq. 0)
then
1212 tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
1214 call gs%comm%send_dof(dst)%push(shared_gs_id)
1218 if (gs%comm%send_dof(dst)%size() .gt. 0)
then
1219 call send_pe%push(dst)
1223 if (gs%dofmap%msh%neigh(src))
then
1224 call mpi_wait(send_req, mpi_status_ignore, ierr)
1229 call gs%comm%init(send_pe, recv_pe)
1234 deallocate(send_buf)
1235 deallocate(recv_flg)
1236 deallocate(shared_flg)
1238 call gs%shared_dofs%free()
1244 class(
gs_t),
intent(inout) :: gs
1245 type(
field_t),
intent(inout) :: u
1246 type(c_ptr),
optional,
intent(inout) :: event
1249 n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
1250 if (
present(event))
then
1260 class(
gs_t),
intent(inout) :: gs
1261 integer,
intent(in) :: n
1262 real(kind=
rp),
contiguous,
dimension(:,:,:,:),
intent(inout) :: u
1263 type(c_ptr),
optional,
intent(inout) :: event
1266 if (
present(event))
then
1276 class(
gs_t),
intent(inout) :: gs
1277 integer,
intent(in) :: n
1278 real(kind=
rp),
dimension(n),
intent(inout) :: u
1279 type(c_ptr),
optional,
intent(inout) :: event
1280 integer :: m, l, op, lo, so
1282 lo = gs%local_facet_offset
1283 so = -gs%shared_facet_offset
1291 call gs%comm%nbrecv()
1294 call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
1295 gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.)
1298 call gs%comm%nbsend(gs%shared_gs, l, &
1299 gs%bcknd%gather_event, gs%bcknd%gs_stream)
1306 call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
1307 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.)
1308 call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
1309 gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, .false., c_null_ptr)
1314 call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
1317 if (
present(event))
then
1318 call gs%bcknd%scatter(gs%shared_gs, l,&
1319 gs%shared_dof_gs, u, n, &
1320 gs%shared_gs_dof, gs%nshared_blks, &
1321 gs%shared_blk_len, .true., event)
1323 call gs%bcknd%scatter(gs%shared_gs, l,&
1324 gs%shared_dof_gs, u, n, &
1325 gs%shared_gs_dof, gs%nshared_blks, &
1326 gs%shared_blk_len, .true., c_null_ptr)
subroutine gs_find_blks(dg, blk_len, nblks, n, m)
Find blocks sharing dofs in non-facet data.
recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
Sort the dof lists based on the dof to gather-scatter list.
integer function gs_mapping_add_dof(map_, dof, max_id)
Register a unique dof.
Deassociate a Fortran array from a device pointer.
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
Synchronize a device or stream.
type(mpi_comm) neko_comm
MPI communicator.
integer pe_size
MPI size of communicator.
Device abstraction, common interface for various accelerators.
integer, parameter, public host_to_device
subroutine, public device_free(x_d)
Deallocate memory on the device.
Defines a mapping of the degrees of freedom.
subroutine gs_init_mapping(gs)
Setup mapping of dofs to gather-scatter operations.
subroutine gs_init(gs, dofmap, bcknd)
Initialize a gather-scatter kernel.
subroutine gs_schedule(gs)
Schedule shared gather-scatter operations.
subroutine gs_free(gs)
Deallocate a gather-scatter kernel.
subroutine gs_op_r4(gs, u, n, op, event)
Gather-scatter operation on a rank 4 array.
subroutine gs_op_vector(gs, u, n, op, event)
Gather-scatter operation on a vector u with op op.
subroutine gs_op_fld(gs, u, op, event)
Gather-scatter operation on a field u with op op.
Defines a gather-scatter backend.
integer, parameter, public gs_bcknd_cpu
integer, parameter, public gs_bcknd_sx
integer, parameter, public gs_bcknd_dev
Defines a gather-scatter communication method.
Generic Gather-scatter backend for CPUs.
Defines GPU aware MPI gather-scatter communication.
Generic Gather-scatter backend for accelerators.
Defines MPI gather-scatter communication.
Defines Gather-scatter operations.
integer, parameter, public gs_op_add
integer, parameter, public gs_op_max
integer, parameter, public gs_op_min
integer, parameter, public gs_op_mul
Generic Gather-scatter backend for NEC Vector Engines.
Implements a hash table ADT.
type(log_t), public neko_log
Global log stream.
integer, parameter, public log_size
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_hip
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_cuda
integer, parameter, public i2
integer, parameter, public dp
integer, parameter, public rp
Global precision used in computations.
subroutine, public profiler_start_region(name, region_id)
Started a named (name) profiler region.
subroutine, public profiler_end_region(name, region_id)
End the most recently started profiler region.
Implements a dynamic stack ADT.
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Gather-scatter communication method.
Gather-scatter backend for CPUs.
Gather-scatter backend for offloading devices.
Gather-scatter communication using device MPI. The arrays are indexed per PE like send_pe and @ recv_...
Gather-scatter communication using MPI.
Gather-scatter backend for NEC SX-Aurora.
Integer*8 based hash table.
Iterator for an integer*8 based hash table.