40 use,
intrinsic :: iso_c_binding
41#ifdef HAVE_COARRAY_EVENTS
42 use,
intrinsic :: iso_fortran_env, only : atomic_int_kind, event_type
44 use,
intrinsic :: iso_fortran_env, only : atomic_int_kind
78 real(kind=
rp),
allocatable :: gs_caf_recv_buf(:)[:]
79 integer :: gs_caf_buf_size = 0
84 integer :: gs_caf_mode = 0
91 integer(kind=atomic_int_kind),
allocatable :: gs_caf_data_ready(:)[:]
92 integer(kind=atomic_int_kind),
allocatable :: gs_caf_buf_ready(:)[:]
101 integer,
allocatable :: gs_caf_send_count(:)
102 integer,
allocatable :: gs_caf_recv_count(:)
104#ifdef HAVE_COARRAY_EVENTS
116 type(event_type),
allocatable :: gs_caf_data_ready_ev[:]
117 type(event_type),
allocatable :: gs_caf_buf_ready_ev[:]
118 logical :: gs_caf_event_in_use = .false.
131 real(kind=
rp),
allocatable :: send_buf(:)
133 integer,
allocatable :: send_len(:), recv_len(:)
135 integer,
allocatable :: send_offset(:), recv_offset(:)
137 integer,
allocatable :: dest_offset(:)
139 integer,
allocatable :: send_img(:), recv_img(:)
143 integer,
allocatable :: sync_img(:)
146 logical :: send_started = .false.
150 integer :: parity = 0
163 class(
gs_caf_t),
intent(inout) :: this
167 integer,
allocatable :: dest_xchg(:)[:]
168 logical,
allocatable :: in_neigh(:)
169 integer :: i, nsend, nrecv, send_total, recv_total, max_total, n_neigh
170 integer :: me, env_len
171 character(len=64) :: env_val
174 if (gs_caf_mode .eq. 0)
then
175 call get_environment_variable(
"NEKO_GS_CAF_SIGNALING", env_val, env_len)
176 if (env_len .gt. 0 .and. env_val(1:env_len) .eq.
"atomic")
then
178 allocate(gs_caf_data_ready(0:
pe_size - 1)[*])
179 allocate(gs_caf_buf_ready(0:
pe_size - 1)[*])
180 allocate(gs_caf_send_count(0:
pe_size - 1))
181 allocate(gs_caf_recv_count(0:
pe_size - 1))
182 gs_caf_send_count = 0
183 gs_caf_recv_count = 0
188 call atomic_define(gs_caf_data_ready(i), 0_atomic_int_kind)
189 call atomic_define(gs_caf_buf_ready(i), 0_atomic_int_kind)
191 else if (env_len .gt. 0 .and. env_val(1:env_len) .eq.
"event")
then
192#ifdef HAVE_COARRAY_EVENTS
195 call neko_error(
"NEKO_GS_CAF_SIGNALING=event requires a Fortran " // &
196 "compiler with coarray events support")
203#ifdef HAVE_COARRAY_EVENTS
209 if (.not.
allocated(gs_caf_data_ready_ev))
then
210 allocate(gs_caf_data_ready_ev[*])
211 allocate(gs_caf_buf_ready_ev[*])
216 call this%init_order(send_pe, recv_pe)
218 nsend =
size(this%send_pe)
219 nrecv =
size(this%recv_pe)
221 allocate(this%send_len(nsend), this%send_offset(nsend), &
222 this%send_img(nsend), this%dest_offset(nsend))
223 allocate(this%recv_len(nrecv), this%recv_offset(nrecv), &
224 this%recv_img(nrecv))
229 this%recv_len(i) = this%recv_dof(this%recv_pe(i))%size()
230 this%recv_offset(i) = recv_total
231 recv_total = recv_total + this%recv_len(i)
232 this%recv_img(i) = this%recv_pe(i) + 1
238 this%send_len(i) = this%send_dof(this%send_pe(i))%size()
239 this%send_offset(i) = send_total
240 send_total = send_total + this%send_len(i)
241 this%send_img(i) = this%send_pe(i) + 1
243 allocate(this%send_buf(
max(1, send_total)))
250 max_total = recv_total
251 call co_max(max_total)
252 max_total =
max(1, max_total)
253 if (max_total .gt. gs_caf_buf_size)
then
254 if (
allocated(gs_caf_recv_buf))
deallocate(gs_caf_recv_buf)
255 allocate(gs_caf_recv_buf(2 * max_total)[*])
256 gs_caf_buf_size = max_total
265 allocate(dest_xchg(0:
pe_size - 1)[*])
267 dest_xchg(me - 1)[this%recv_img(i)] = this%recv_offset(i)
271 this%dest_offset(i) = dest_xchg(this%send_pe(i))
273 deallocate(dest_xchg)
279 allocate(in_neigh(0:
pe_size - 1))
282 in_neigh(this%send_pe(i)) = .true.
285 in_neigh(this%recv_pe(i)) = .true.
287 n_neigh = count(in_neigh)
288 allocate(this%sync_img(n_neigh))
291 if (in_neigh(i))
then
292 n_neigh = n_neigh + 1
293 this%sync_img(n_neigh) = i + 1
303 call neko_error(
"Coarray Fortran support not built; reconfigure with " // &
304 "a coarray-capable Fortran compiler")
312 class(
gs_caf_t),
intent(inout) :: this
314 if (
allocated(this%send_buf))
deallocate(this%send_buf)
315 if (
allocated(this%send_len))
deallocate(this%send_len)
316 if (
allocated(this%recv_len))
deallocate(this%recv_len)
317 if (
allocated(this%send_offset))
deallocate(this%send_offset)
318 if (
allocated(this%recv_offset))
deallocate(this%recv_offset)
319 if (
allocated(this%dest_offset))
deallocate(this%dest_offset)
320 if (
allocated(this%send_img))
deallocate(this%send_img)
321 if (
allocated(this%recv_img))
deallocate(this%recv_img)
322 if (
allocated(this%sync_img))
deallocate(this%sync_img)
324 call this%free_order()
325 call this%free_dofs()
335 class(
gs_caf_t),
intent(inout) :: this
336 integer,
intent(in) :: n
337 real(kind=
rp),
dimension(n),
intent(inout) :: u
338 type(c_ptr),
intent(inout) :: deps
339 type(c_ptr),
intent(inout) :: strm
341 integer :: i, j, dst, off, dimg, ndst, doff, half_off
342 integer,
pointer :: sp(:)
343 integer(kind=atomic_int_kind) :: flag
346 half_off = this%parity * gs_caf_buf_size
349 do i = 1,
size(this%send_pe)
350 dst = this%send_pe(i)
351 off = this%send_offset(i)
352 ndst = this%send_len(i)
353 dimg = this%send_img(i)
354 doff = this%dest_offset(i)
355 sp => this%send_dof(dst)%array()
356 do concurrent(j = 1:ndst)
357 this%send_buf(off + j) = u(sp(j))
359 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
360 = this%send_buf(off + 1 : off + ndst)
362#ifdef HAVE_COARRAY_EVENTS
367 if (gs_caf_event_in_use)
then
368 call neko_error(
"Event-mode coarray gather-scatter does not " // &
369 "support overlapping gs ops on different instances")
371 gs_caf_event_in_use = .true.
375 if (this%send_started)
then
376 if (
size(this%send_pe) .gt. 0)
then
377 event wait(gs_caf_buf_ready_ev, until_count=
size(this%send_pe))
380 this%send_started = .true.
383 do i = 1,
size(this%send_pe)
384 dst = this%send_pe(i)
385 off = this%send_offset(i)
386 ndst = this%send_len(i)
387 dimg = this%send_img(i)
388 doff = this%dest_offset(i)
389 sp => this%send_dof(dst)%array()
390 do concurrent(j = 1:ndst)
391 this%send_buf(off + j) = u(sp(j))
393 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
394 = this%send_buf(off + 1 : off + ndst)
402 event post(gs_caf_data_ready_ev[dimg])
406 me_rank = this_image() - 1
411 do i = 1,
size(this%send_pe)
412 dst = this%send_pe(i)
413 off = this%send_offset(i)
414 ndst = this%send_len(i)
415 sp => this%send_dof(dst)%array()
416 do concurrent(j = 1:ndst)
417 this%send_buf(off + j) = u(sp(j))
425 do i = 1,
size(this%send_pe)
426 off = this%send_offset(i)
427 ndst = this%send_len(i)
428 dimg = this%send_img(i)
429 doff = this%dest_offset(i)
432 call atomic_ref(flag, gs_caf_buf_ready(this%send_pe(i)))
433 if (int(flag) .ge. gs_caf_send_count(this%send_pe(i)) - 1)
exit
436 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
437 = this%send_buf(off + 1 : off + ndst)
439 gs_caf_send_count(this%send_pe(i)) = &
440 gs_caf_send_count(this%send_pe(i)) + 1
441 call atomic_define(gs_caf_data_ready(me_rank)[dimg], &
442 int(gs_caf_send_count(this%send_pe(i)), atomic_int_kind))
446 call neko_error(
"Coarray Fortran support not built")
453 class(
gs_caf_t),
intent(inout) :: this
461 class(
gs_caf_t),
intent(inout) :: this
462 integer,
intent(in) :: n
463 real(kind=
rp),
dimension(n),
intent(inout) :: u
464 type(c_ptr),
intent(inout) :: strm
467 integer :: i, j, src, off, nsrc, half_off
468 integer,
pointer ::
sp(:)
469 integer(kind=atomic_int_kind) :: flag
472 half_off = this%parity * gs_caf_buf_size
475 if (
allocated(this%sync_img))
then
476 if (
size(this%sync_img) .gt. 0)
then
477 sync images(this%sync_img)
480#ifdef HAVE_COARRAY_EVENTS
482 if (
size(this%recv_pe) .gt. 0)
then
483 event wait(gs_caf_data_ready_ev, until_count=
size(this%recv_pe))
489 do i = 1,
size(this%recv_pe)
490 gs_caf_recv_count(this%recv_pe(i)) = &
491 gs_caf_recv_count(this%recv_pe(i)) + 1
493 call atomic_ref(flag, gs_caf_data_ready(this%recv_pe(i)))
494 if (int(flag) .ge. gs_caf_recv_count(this%recv_pe(i)))
exit
499 do i = 1,
size(this%recv_pe)
500 src = this%recv_pe(i)
501 off = this%recv_offset(i)
502 nsrc = this%recv_len(i)
503 sp => this%recv_dof(src)%array()
507 do concurrent(j = 1:nsrc)
508 u(
sp(j)) = u(
sp(j)) + gs_caf_recv_buf(half_off + off + j)
512 do concurrent(j = 1:nsrc)
513 u(
sp(j)) = u(
sp(j)) * gs_caf_recv_buf(half_off + off + j)
517 do concurrent(j = 1:nsrc)
518 u(
sp(j)) = min(u(
sp(j)), gs_caf_recv_buf(half_off + off + j))
522 do concurrent(j = 1:nsrc)
523 u(
sp(j)) =
max(u(
sp(j)), gs_caf_recv_buf(half_off + off + j))
526 call neko_error(
"Unknown operation in gs_nbwait_caf")
533 me_rank = this_image() - 1
534 do i = 1,
size(this%recv_pe)
535 call atomic_define(gs_caf_buf_ready(me_rank)[this%recv_img(i)], &
536 int(gs_caf_recv_count(this%recv_pe(i)), atomic_int_kind))
538#ifdef HAVE_COARRAY_EVENTS
540 do i = 1,
size(this%recv_pe)
541 event post(gs_caf_buf_ready_ev[this%recv_img(i)])
543 gs_caf_event_in_use = .false.
548 this%parity = 1 - this%parity
550 call neko_error(
"Coarray Fortran support not built")
integer, public pe_size
MPI size of communicator.
Defines Coarray Fortran gather-scatter communication.
subroutine gs_caf_init(this, send_pe, recv_pe)
Initialise Coarray Fortran based communication method.
subroutine gs_nbwait_caf(this, u, n, op, strm)
Wait for all incoming puts and reduce them into u. In sync mode a sync_images bracket pairs with the ...
integer, parameter, public gs_caf_signal_event
integer, parameter, public gs_caf_signal_atomic
subroutine gs_caf_free(this)
Deallocate Coarray Fortran based communication method. The shared module-level recv coarray is intent...
integer, parameter, public gs_caf_signal_sync
subroutine gs_nbrecv_caf(this)
No-op for coarrays: senders push into the receiver's buffer, so the receive side does not need to pos...
subroutine gs_nbsend_caf(this, u, n, deps, strm)
Pack u into per-peer slabs and put each slab into the remote image's recv_buf. Double buffering means...
Defines a gather-scatter communication method.
Defines Gather-scatter operations.
integer, parameter, public gs_op_add
integer, parameter, public gs_op_max
integer, parameter, public gs_op_min
integer, parameter, public gs_op_mul
integer, parameter, public sp
integer, parameter, public rp
Global precision used in computations.
Implements a dynamic stack ADT.
Gather-scatter communication using Coarray Fortran (F2008). Each image puts directly into the (module...
Gather-scatter communication method.