40 use mpi_f08,
only : mpi_allreduce, mpi_alltoall, mpi_integer, mpi_max
48 use,
intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_loc, &
49 c_f_pointer, c_associated, c_sizeof, c_size_t, c_int64_t
56 integer,
allocatable :: ndofs(:)
58 integer,
allocatable :: offset(:)
61 integer,
allocatable :: remote_offset(:)
65 integer :: max_total = 0
67 type(c_ptr) :: buf_ptr = c_null_ptr
97 type(c_ptr) :: data_signals_ptr = c_null_ptr
101 type(c_ptr) :: ack_signals_ptr = c_null_ptr
106 integer(kind=i8) :: iter = 0
123 integer,
intent(in) :: pe_order(:)
124 type(
stack_i4_t),
intent(inout) :: dof_stack(0:)
125 integer :: i, ierr, n
126 integer(c_size_t) :: sz
127 real(c_rp) :: rp_dummy
128#ifndef HAVE_OPENSHMEM
129 call neko_error(
'Neko was not built with OpenSHMEM support')
134 allocate(this%ndofs(n))
135 allocate(this%offset(n))
136 allocate(this%remote_offset(n))
139 this%remote_offset(i) = -1
144 this%ndofs(i) = dof_stack(pe_order(i))%size()
145 this%offset(i) = this%total
146 this%total = this%total + this%ndofs(i)
151 call mpi_allreduce(this%total, this%max_total, 1, mpi_integer, mpi_max, &
154 sz = c_sizeof(rp_dummy) * int(
max(this%max_total, 1), c_size_t)
156 if (.not. c_associated(this%buf_ptr))
then
157 call neko_error(
'shmem_malloc failed for gs_shmem buffer')
166 if (
allocated(this%ndofs))
deallocate(this%ndofs)
167 if (
allocated(this%offset))
deallocate(this%offset)
168 if (
allocated(this%remote_offset))
deallocate(this%remote_offset)
171 if (c_associated(this%buf_ptr))
then
177 this%buf_ptr = c_null_ptr
189 integer(c_size_t) :: i64_size
190 integer(c_int64_t) :: i64_dummy
191 integer,
allocatable :: local_offsets(:), remote_offsets(:)
192#ifndef HAVE_OPENSHMEM
193 call neko_error(
'Neko was not built with OpenSHMEM support')
197 call this%init_order(send_pe, recv_pe)
199 call this%send_buf%init(this%send_pe, this%send_dof)
200 call this%recv_buf%init(this%recv_pe, this%recv_dof)
205 i64_size = c_sizeof(i64_dummy)
207 if (.not. c_associated(this%data_signals_ptr))
then
208 call neko_error(
'shmem_calloc failed for gs_shmem data signals')
211 if (.not. c_associated(this%ack_signals_ptr))
then
212 call neko_error(
'shmem_calloc failed for gs_shmem ack signals')
220 allocate(local_offsets(0:
pe_size - 1))
221 allocate(remote_offsets(0:
pe_size - 1))
223 do i = 1,
size(this%recv_pe)
224 local_offsets(this%recv_pe(i)) = this%recv_buf%offset(i)
226 call mpi_alltoall(local_offsets, 1, mpi_integer, &
227 remote_offsets, 1, mpi_integer,
neko_comm, ierr)
228 do i = 1,
size(this%send_pe)
229 this%send_buf%remote_offset(i) = remote_offsets(this%send_pe(i))
231 deallocate(local_offsets)
232 deallocate(remote_offsets)
251 if (c_associated(this%data_signals_ptr))
then
254 if (c_associated(this%ack_signals_ptr))
then
257 this%data_signals_ptr = c_null_ptr
258 this%ack_signals_ptr = c_null_ptr
262 call this%send_buf%free()
263 call this%recv_buf%free()
265 call this%free_order()
266 call this%free_dofs()
277 integer,
intent(in) :: n
278 real(kind=
rp),
dimension(n),
intent(inout) :: u
279 type(c_ptr),
intent(inout) :: deps
280 type(c_ptr),
intent(inout) :: strm
281 integer :: i, j, dst, base
282 integer(c_size_t) :: nbytes
283 integer ,
pointer :: sp(:)
284 real(kind=
rp),
pointer :: send_data(:), recv_data(:)
285 integer(c_int64_t),
pointer :: data_signals(:), ack_signals(:)
286 real(c_rp) :: rp_dummy
291 this%iter = this%iter + 1
293 call c_f_pointer(this%send_buf%buf_ptr, send_data, &
294 [
max(this%send_buf%max_total, 1)])
295 call c_f_pointer(this%recv_buf%buf_ptr, recv_data, &
296 [
max(this%recv_buf%max_total, 1)])
297 call c_f_pointer(this%data_signals_ptr, data_signals, [
pe_size])
298 call c_f_pointer(this%ack_signals_ptr, ack_signals, [
pe_size])
300 do i = 1,
size(this%send_pe)
301 dst = this%send_pe(i)
310 sp => this%send_dof(dst)%array()
311 base = this%send_buf%offset(i)
312 do concurrent(j = 1:this%send_dof(dst)%size())
313 send_data(base + j) = u(sp(j))
316 nbytes = int(this%send_buf%ndofs(i), c_size_t) * c_sizeof(rp_dummy)
319 c_loc(recv_data(this%send_buf%remote_offset(i) + 1)), &
320 c_loc(send_data(base + 1)), &
322 c_loc(data_signals(
pe_rank + 1)), &
339 integer,
intent(in) :: n
340 real(kind=
rp),
dimension(n),
intent(inout) :: u
341 type(c_ptr),
intent(inout) :: strm
343 integer :: i, j, src, base
344 integer(c_int64_t) :: dummy
345 integer ,
pointer ::
sp(:)
346 real(kind=
rp),
pointer :: recv_data(:)
347 integer(c_int64_t),
pointer :: data_signals(:), ack_signals(:)
350 call c_f_pointer(this%recv_buf%buf_ptr, recv_data, &
351 [
max(this%recv_buf%max_total, 1)])
352 call c_f_pointer(this%data_signals_ptr, data_signals, [
pe_size])
353 call c_f_pointer(this%ack_signals_ptr, ack_signals, [
pe_size])
355 do i = 1,
size(this%recv_pe)
356 src = this%recv_pe(i)
362 sp => this%recv_dof(src)%array()
363 base = this%recv_buf%offset(i)
367 do concurrent(j = 1:this%recv_dof(src)%size())
368 u(
sp(j)) = u(
sp(j)) + recv_data(base + j)
372 do concurrent(j = 1:this%recv_dof(src)%size())
373 u(
sp(j)) = u(
sp(j)) * recv_data(base + j)
377 do concurrent(j = 1:this%recv_dof(src)%size())
378 u(
sp(j)) = min(u(
sp(j)), recv_data(base + j))
382 do concurrent(j = 1:this%recv_dof(src)%size())
383 u(
sp(j)) =
max(u(
sp(j)), recv_data(base + j))
386 call neko_error(
"Unknown operation in gs_nbwait_shmem")
393 c_loc(ack_signals(
pe_rank + 1)), &
integer, public pe_size
MPI size of communicator.
integer, public pe_rank
MPI rank.
type(mpi_comm), public neko_comm
MPI communicator.
Defines a gather-scatter communication method.
Defines Gather-scatter operations.
integer, parameter, public gs_op_add
integer, parameter, public gs_op_max
integer, parameter, public gs_op_min
integer, parameter, public gs_op_mul
Defines OpenSHMEM gather-scatter communication.
subroutine gs_shmem_buf_init(this, pe_order, dof_stack)
Allocate symmetric memory and per-neighbor bookkeeping for one direction of communication.
subroutine gs_shmem_init(this, send_pe, recv_pe)
Initialise OpenSHMEM based communication method.
subroutine gs_shmem_buf_free(this)
Release symmetric memory and bookkeeping.
subroutine gs_shmem_nbrecv(this)
No-op: receives are completed via remote put-with-signal.
subroutine gs_shmem_free(this)
Deallocate OpenSHMEM based communication method.
subroutine gs_shmem_nbwait(this, u, n, op, strm)
Wait per-neighbor for the signal indicating that data has landed, apply the gather-scatter operation ...
subroutine gs_shmem_nbsend(this, u, n, deps, strm)
Pack the gathered shared dofs into the symmetric send buffer and issue non-blocking puts with signali...
integer, parameter, public i8
integer, parameter, public sp
integer, parameter, public c_rp
integer, parameter, public rp
Global precision used in computations.
Fortran bindings to SHMEM's C API.
Implements a dynamic stack ADT.
Gather-scatter communication method.
Symmetric buffer for one direction of OpenSHMEM communication.
Gather-scatter communication using OpenSHMEM one-sided puts with per-rank signaling for completion (O...