40 use,
intrinsic :: iso_c_binding
41#ifdef HAVE_COARRAY_EVENTS
42 use,
intrinsic :: iso_fortran_env, only : atomic_int_kind, event_type
44 use,
intrinsic :: iso_fortran_env, only : atomic_int_kind
78 real(kind=
rp),
allocatable :: gs_caf_recv_buf(:)[:]
79 integer :: gs_caf_buf_size = 0
84 integer :: gs_caf_mode = 0
91 integer(kind=atomic_int_kind),
allocatable :: gs_caf_data_ready(:)[:]
92 integer(kind=atomic_int_kind),
allocatable :: gs_caf_buf_ready(:)[:]
101 integer,
allocatable :: gs_caf_send_count(:)
102 integer,
allocatable :: gs_caf_recv_count(:)
104#ifdef HAVE_COARRAY_EVENTS
116 type(event_type),
allocatable :: gs_caf_data_ready_ev[:]
117 type(event_type),
allocatable :: gs_caf_buf_ready_ev[:]
118 logical :: gs_caf_event_in_use = .false.
131 real(kind=
rp),
allocatable :: send_buf(:)
133 integer,
allocatable :: send_len(:), recv_len(:)
135 integer,
allocatable :: send_offset(:), recv_offset(:)
137 integer,
allocatable :: dest_offset(:)
139 integer,
allocatable :: send_img(:), recv_img(:)
143 integer,
allocatable :: sync_img(:)
146 logical :: send_started = .false.
150 integer :: parity = 0
163 class(
gs_caf_t),
intent(inout) :: this
167 integer,
allocatable :: dest_xchg(:)[:]
168 logical,
allocatable :: in_neigh(:)
169 integer :: i, nsend, nrecv, send_total, recv_total, max_total, n_neigh
170 integer :: me, env_len
171 character(len=64) :: env_val
174 if (gs_caf_mode .eq. 0)
then
175 call get_environment_variable(
"NEKO_GS_CAF_SIGNALING", env_val, env_len)
176 if (env_len .gt. 0 .and. env_val(1:env_len) .eq.
"atomic")
then
178 allocate(gs_caf_data_ready(0:
pe_size - 1)[*])
179 allocate(gs_caf_buf_ready(0:
pe_size - 1)[*])
180 allocate(gs_caf_send_count(0:
pe_size - 1))
181 allocate(gs_caf_recv_count(0:
pe_size - 1))
182 gs_caf_send_count = 0
183 gs_caf_recv_count = 0
188 call atomic_define(gs_caf_data_ready(i), 0_atomic_int_kind)
189 call atomic_define(gs_caf_buf_ready(i), 0_atomic_int_kind)
191 else if (env_len .gt. 0 .and. env_val(1:env_len) .eq.
"event")
then
192#ifdef HAVE_COARRAY_EVENTS
195 call neko_error(
"NEKO_GS_CAF_SIGNALING=event requires a Fortran " // &
196 "compiler with coarray events support")
203#ifdef HAVE_COARRAY_EVENTS
209 if (.not.
allocated(gs_caf_data_ready_ev))
then
210 allocate(gs_caf_data_ready_ev[*])
211 allocate(gs_caf_buf_ready_ev[*])
216 call this%init_order(send_pe, recv_pe)
218 nsend =
size(this%send_pe)
219 nrecv =
size(this%recv_pe)
221 allocate(this%send_len(nsend), this%send_offset(nsend), &
222 this%send_img(nsend), this%dest_offset(nsend))
223 allocate(this%recv_len(nrecv), this%recv_offset(nrecv), &
224 this%recv_img(nrecv))
229 this%recv_len(i) = this%recv_dof(this%recv_pe(i))%size()
230 this%recv_offset(i) = recv_total
231 recv_total = recv_total + this%recv_len(i)
232 this%recv_img(i) = this%recv_pe(i) + 1
238 this%send_len(i) = this%send_dof(this%send_pe(i))%size()
239 this%send_offset(i) = send_total
240 send_total = send_total + this%send_len(i)
241 this%send_img(i) = this%send_pe(i) + 1
243 allocate(this%send_buf(
max(1, send_total)))
250 max_total = recv_total
251 call co_max(max_total)
252 max_total =
max(1, max_total)
253 if (max_total .gt. gs_caf_buf_size)
then
254 if (
allocated(gs_caf_recv_buf))
deallocate(gs_caf_recv_buf)
255 allocate(gs_caf_recv_buf(2 * max_total)[*])
256 gs_caf_buf_size = max_total
265 allocate(dest_xchg(0:
pe_size - 1)[*])
267 dest_xchg(me - 1)[this%recv_img(i)] = this%recv_offset(i)
271 this%dest_offset(i) = dest_xchg(this%send_pe(i))
273 deallocate(dest_xchg)
279 allocate(in_neigh(0:
pe_size - 1))
282 in_neigh(this%send_pe(i)) = .true.
285 in_neigh(this%recv_pe(i)) = .true.
287 n_neigh = count(in_neigh)
288 allocate(this%sync_img(n_neigh))
291 if (in_neigh(i))
then
292 n_neigh = n_neigh + 1
293 this%sync_img(n_neigh) = i + 1
303 call neko_error(
"Coarray Fortran support not built; reconfigure with " // &
304 "a coarray-capable Fortran compiler")
312 class(
gs_caf_t),
intent(inout) :: this
314 if (
allocated(this%send_buf))
deallocate(this%send_buf)
315 if (
allocated(this%send_len))
deallocate(this%send_len)
316 if (
allocated(this%recv_len))
deallocate(this%recv_len)
317 if (
allocated(this%send_offset))
deallocate(this%send_offset)
318 if (
allocated(this%recv_offset))
deallocate(this%recv_offset)
319 if (
allocated(this%dest_offset))
deallocate(this%dest_offset)
320 if (
allocated(this%send_img))
deallocate(this%send_img)
321 if (
allocated(this%recv_img))
deallocate(this%recv_img)
322 if (
allocated(this%sync_img))
deallocate(this%sync_img)
324 call this%free_order()
325 call this%free_dofs()
335 class(
gs_caf_t),
intent(inout) :: this
336 integer,
intent(in) :: n
337 real(kind=
rp),
dimension(n),
intent(inout) :: u
338 integer,
intent(in) :: tag
339 type(c_ptr),
intent(inout) :: deps
340 type(c_ptr),
intent(inout) :: strm
342 integer :: i, j, dst, off, dimg, ndst, doff, half_off
343 integer,
pointer :: sp(:)
344 integer(kind=atomic_int_kind) :: flag
347 half_off = this%parity * gs_caf_buf_size
350 do i = 1,
size(this%send_pe)
351 dst = this%send_pe(i)
352 off = this%send_offset(i)
353 ndst = this%send_len(i)
354 dimg = this%send_img(i)
355 doff = this%dest_offset(i)
356 sp => this%send_dof(dst)%array()
357 do concurrent(j = 1:ndst)
358 this%send_buf(off + j) = u(sp(j))
360 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
361 = this%send_buf(off + 1 : off + ndst)
363#ifdef HAVE_COARRAY_EVENTS
368 if (gs_caf_event_in_use)
then
369 call neko_error(
"Event-mode coarray gather-scatter does not " // &
370 "support overlapping gs ops on different instances")
372 gs_caf_event_in_use = .true.
376 if (this%send_started)
then
377 if (
size(this%send_pe) .gt. 0)
then
378 event wait(gs_caf_buf_ready_ev, until_count=
size(this%send_pe))
381 this%send_started = .true.
384 do i = 1,
size(this%send_pe)
385 dst = this%send_pe(i)
386 off = this%send_offset(i)
387 ndst = this%send_len(i)
388 dimg = this%send_img(i)
389 doff = this%dest_offset(i)
390 sp => this%send_dof(dst)%array()
391 do concurrent(j = 1:ndst)
392 this%send_buf(off + j) = u(sp(j))
394 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
395 = this%send_buf(off + 1 : off + ndst)
403 event post(gs_caf_data_ready_ev[dimg])
407 me_rank = this_image() - 1
412 do i = 1,
size(this%send_pe)
413 dst = this%send_pe(i)
414 off = this%send_offset(i)
415 ndst = this%send_len(i)
416 sp => this%send_dof(dst)%array()
417 do concurrent(j = 1:ndst)
418 this%send_buf(off + j) = u(sp(j))
426 do i = 1,
size(this%send_pe)
427 off = this%send_offset(i)
428 ndst = this%send_len(i)
429 dimg = this%send_img(i)
430 doff = this%dest_offset(i)
433 call atomic_ref(flag, gs_caf_buf_ready(this%send_pe(i)))
434 if (int(flag) .ge. gs_caf_send_count(this%send_pe(i)) - 1)
exit
437 gs_caf_recv_buf(half_off + doff + 1 : half_off + doff + ndst)[dimg] &
438 = this%send_buf(off + 1 : off + ndst)
440 gs_caf_send_count(this%send_pe(i)) = &
441 gs_caf_send_count(this%send_pe(i)) + 1
442 call atomic_define(gs_caf_data_ready(me_rank)[dimg], &
443 int(gs_caf_send_count(this%send_pe(i)), atomic_int_kind))
447 call neko_error(
"Coarray Fortran support not built")
454 class(
gs_caf_t),
intent(inout) :: this
455 integer,
intent(in) :: tag
463 class(
gs_caf_t),
intent(inout) :: this
464 integer,
intent(in) :: n
465 real(kind=
rp),
dimension(n),
intent(inout) :: u
466 type(c_ptr),
intent(inout) :: strm
469 integer :: i, j, src, off, nsrc, half_off
470 integer,
pointer ::
sp(:)
471 integer(kind=atomic_int_kind) :: flag
474 half_off = this%parity * gs_caf_buf_size
477 if (
allocated(this%sync_img))
then
478 if (
size(this%sync_img) .gt. 0)
then
479 sync images(this%sync_img)
482#ifdef HAVE_COARRAY_EVENTS
484 if (
size(this%recv_pe) .gt. 0)
then
485 event wait(gs_caf_data_ready_ev, until_count=
size(this%recv_pe))
491 do i = 1,
size(this%recv_pe)
492 gs_caf_recv_count(this%recv_pe(i)) = &
493 gs_caf_recv_count(this%recv_pe(i)) + 1
495 call atomic_ref(flag, gs_caf_data_ready(this%recv_pe(i)))
496 if (int(flag) .ge. gs_caf_recv_count(this%recv_pe(i)))
exit
501 do i = 1,
size(this%recv_pe)
502 src = this%recv_pe(i)
503 off = this%recv_offset(i)
504 nsrc = this%recv_len(i)
505 sp => this%recv_dof(src)%array()
509 do concurrent(j = 1:nsrc)
510 u(
sp(j)) = u(
sp(j)) + gs_caf_recv_buf(half_off + off + j)
514 do concurrent(j = 1:nsrc)
515 u(
sp(j)) = u(
sp(j)) * gs_caf_recv_buf(half_off + off + j)
519 do concurrent(j = 1:nsrc)
520 u(
sp(j)) = min(u(
sp(j)), gs_caf_recv_buf(half_off + off + j))
524 do concurrent(j = 1:nsrc)
525 u(
sp(j)) =
max(u(
sp(j)), gs_caf_recv_buf(half_off + off + j))
528 call neko_error(
"Unknown operation in gs_nbwait_caf")
535 me_rank = this_image() - 1
536 do i = 1,
size(this%recv_pe)
537 call atomic_define(gs_caf_buf_ready(me_rank)[this%recv_img(i)], &
538 int(gs_caf_recv_count(this%recv_pe(i)), atomic_int_kind))
540#ifdef HAVE_COARRAY_EVENTS
542 do i = 1,
size(this%recv_pe)
543 event post(gs_caf_buf_ready_ev[this%recv_img(i)])
545 gs_caf_event_in_use = .false.
550 this%parity = 1 - this%parity
552 call neko_error(
"Coarray Fortran support not built")
integer, public pe_size
MPI size of communicator.
Defines Coarray Fortran gather-scatter communication.
subroutine gs_nbrecv_caf(this, tag)
No-op for coarrays: senders push into the receiver's buffer, so the receive side does not need to pos...
subroutine gs_nbsend_caf(this, u, n, tag, deps, strm)
Pack u into per-peer slabs and put each slab into the remote image's recv_buf. Double buffering means...
subroutine gs_caf_init(this, send_pe, recv_pe)
Initialise Coarray Fortran based communication method.
subroutine gs_nbwait_caf(this, u, n, op, strm)
Wait for all incoming puts and reduce them into u. In sync mode a sync_images bracket pairs with the ...
integer, parameter, public gs_caf_signal_event
integer, parameter, public gs_caf_signal_atomic
subroutine gs_caf_free(this)
Deallocate Coarray Fortran based communication method. The shared module-level recv coarray is intent...
integer, parameter, public gs_caf_signal_sync
Defines a gather-scatter communication method.
Defines Gather-scatter operations.
integer, parameter, public gs_op_add
integer, parameter, public gs_op_max
integer, parameter, public gs_op_min
integer, parameter, public gs_op_mul
integer, parameter, public sp
integer, parameter, public rp
Global precision used in computations.
Implements a dynamic stack ADT.
Gather-scatter communication using Coarray Fortran (F2008). Each image puts directly into the (module...
Gather-scatter communication method.